diff --git "a/measurement_Unholy-v1-12L-13B.json" "b/measurement_Unholy-v1-12L-13B.json" new file mode 100644--- /dev/null +++ "b/measurement_Unholy-v1-12L-13B.json" @@ -0,0 +1,128960 @@ +{ + "measurement": [ + { + "key": "model.layers.0.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.019502714276313782, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.019353142008185387, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.010185332968831062, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.01015306543558836, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0101212989538908, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.00643075630068779, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.030101288110017776, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.01924680732190609, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.010139452293515205, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.010099693201482296, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.010371269658207893, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.011674393899738789, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.010089932009577751, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.007293194532394409, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.006414355244487524, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.007283508777618408, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.006407686974853277, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0061303419061005116, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.006406545173376799, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.006128484848886728, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006249666213989258, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0064061544835567474, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005915721412748098, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006127706728875637, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.010185332968831062, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005915721412748098, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.021776949986815453, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.021526699885725975, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.010290578007698059, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.010233818553388119, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.010177737101912498, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.004902384709566832, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.026752084493637085, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.02137369103729725, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.010217340663075447, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.010133138857781887, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.010442692786455154, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.011175183579325676, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.010114787146449089, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.006189414765685797, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.004856748040765524, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.006146789528429508, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.004835885018110275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.004351632669568062, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.00483237998560071, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.004346549045294523, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.004447849467396736, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.004831377416849136, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0037786581087857485, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004343811888247728, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.010290578007698059, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0037786581087857485, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.06694409251213074, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.05523550137877464, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.04267074167728424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.036370109766721725, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.02992953360080719, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.02067379839718342, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05731692537665367, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04365357756614685, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0320357009768486, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.024580154567956924, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.025751054286956787, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.030150165781378746, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0214679092168808, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.014964689500629902, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.012965056113898754, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015313937328755856, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.008514894172549248, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.007612895220518112, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.007598293013870716, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.006418805103749037, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007935202680528164, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007105659227818251, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004495661240071058, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005221928004175425, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.014964689500629902, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004495661240071058, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.02685842290520668, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.015953613445162773, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.009196809493005276, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.01038376521319151, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.010014389641582966, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.004395873285830021, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.01708478480577469, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.015120654366910458, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.012700216844677925, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.007642084266990423, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.008119450882077217, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.008710644207894802, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.00750815449282527, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.005394089035689831, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.004769904538989067, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0045114983804523945, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0036416687071323395, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.003510497510433197, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0033443090505898, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0030502916779369116, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0027569737285375595, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0033298812340945005, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.002171405591070652, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.002922768471762538, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.015953613445162773, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.004395873285830021, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.07995244115591049, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.07332981377840042, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.07114085555076599, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.06338576972484589, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.03627496585249901, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.034023530781269073, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.041060950607061386, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.03786436840891838, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.03693453595042229, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.03197528421878815, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.030084235593676567, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.02098056674003601, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.018330371007323265, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.017628293484449387, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.017463041469454765, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.010537274181842804, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.009696727618575096, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.009641231037676334, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.008930766023695469, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.008831976912915707, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.005984672345221043, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.006754843983799219, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.005751739721745253, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.005406379699707031, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.017628293484449387, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.005406379699707031, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.12141083925962448, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.11151915043592453, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.10828761011362076, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.0965210571885109, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.055006641894578934, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0516614094376564, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.062139738351106644, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.05722609907388687, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.056021180003881454, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.04843413457274437, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.04559837654232979, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.031514763832092285, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.02723599039018154, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0262614618986845, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.02603812888264656, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.015663830563426018, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.013706706464290619, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.013630283065140247, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.012442859821021557, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0122915618121624, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.008425607345998287, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.008661924861371517, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.008080851286649704, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.006094341166317463, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.015663830563426018, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.006094341166317463, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.08066724985837936, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.06631405651569366, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.0469575859606266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.0419265478849411, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.03368525952100754, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.02320379577577114, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.06167494133114815, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.05411214753985405, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.036206766963005066, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.028389284387230873, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.028162339702248573, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.027916645631194115, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.0248769111931324, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.017392924055457115, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.015972241759300232, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.014853589236736298, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.011984538286924362, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.010544529184699059, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.01128218974918127, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.009557624347507954, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.010590679943561554, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.010850520804524422, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.008661006577312946, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.008607401512563229, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.017392924055457115, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.008607401512563229, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.01613951288163662, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.012695513665676117, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.009580633603036404, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.008409535512328148, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.006920523941516876, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.004527838435024023, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.012842521071434021, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.010103050619363785, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.007574295159429312, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.005638655740767717, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.006137427408248186, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.006860647350549698, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.005003480706363916, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.003455582307651639, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0029643522575497627, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.003514473093673587, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.00193109386600554, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0017060221871361136, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.001712726429104805, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0014048628509044647, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0018477228004485369, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.001607644953764975, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0010887224925681949, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.001130798365920782, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.01613951288163662, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.004527838435024023, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.01554101426154375, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.011996741406619549, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.00905353482812643, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.007933019660413265, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.006497369613498449, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.004324785899370909, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.01081411074846983, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.009482281282544136, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.007192482240498066, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.005187378730624914, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.005355264991521835, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.00553043931722641, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.004562002141028643, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.003224211512133479, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.002833558013662696, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.002756648464128375, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0018060781294479966, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0016389882657676935, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0015817544190213084, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0013355410192161798, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0014838110655546188, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0014779887860640883, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0010081257205456495, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.001067612087354064, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.01554101426154375, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.004324785899370909, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09225683659315109, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.07650463283061981, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06953474134206772, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.05927079916000366, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.041277091950178146, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03461810201406479, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.052133191376924515, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04765782877802849, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04359252750873566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03264407441020012, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.030598608776926994, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.026502856984734535, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.022851642221212387, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.019957492128014565, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.019211146980524063, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013389005325734615, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010600204579532146, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010322533547878265, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.008951741270720959, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.008447238244116306, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007083104457706213, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007235308643430471, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006072594318538904, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005058919545263052, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013389005325734615, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005058919545263052, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.16971230506896973, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10441853106021881, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06507138162851334, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07242468744516373, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07143048942089081, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.034768059849739075, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1068311259150505, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09591196477413177, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08198586106300354, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04821049049496651, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0519164502620697, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.055827546864748, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04745433107018471, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03646122291684151, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03338807448744774, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02860349975526333, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02198338694870472, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.021075844764709473, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.018474241718649864, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.016634950414299965, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016507333144545555, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01837475784122944, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013152080588042736, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015134225599467754, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.016634950414299965, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013152080588042736, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.13564492762088776, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.12706157565116882, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.12444227188825607, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.11219815164804459, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.060951586812734604, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.05838937684893608, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.06777848303318024, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.06242242082953453, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.061717092990875244, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.05512670427560806, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.052271343767642975, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.03449099883437157, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.02984767034649849, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.029213471338152885, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.029064510017633438, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.017219142988324165, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.015621560625731945, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.015580179169774055, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.014567007310688496, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.014474491588771343, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.00961560383439064, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01032732892781496, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.009418144822120667, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.007894185371696949, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.017219142988324165, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.007894185371696949, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1634356677532196, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1535450965166092, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15061958134174347, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1361865997314453, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07352878898382187, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07057260721921921, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08165153115987778, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07521339505910873, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07443749159574509, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.06669565290212631, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06322693824768066, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04108785465359688, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.035524312406778336, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.034792881458997726, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.034626007080078125, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.020337888970971107, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.017678653821349144, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.01762828417122364, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.016325069591403008, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01621144823729992, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01063347514718771, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.010420975275337696, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.010386615060269833, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00658376095816493, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.017678653821349144, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00658376095816493, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.16792212426662445, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.150234192609787, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.13670608401298523, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.123084656894207, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.07860676944255829, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.06379637122154236, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.09789422154426575, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.09347793459892273, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.08107028901576996, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.07039716839790344, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.06831952184438705, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.05370372533798218, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.050873566418886185, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.03684667870402336, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.03446180000901222, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.02412908896803856, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.020791538059711456, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.019627351313829422, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.01933138631284237, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.017823902890086174, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.013921918347477913, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.016059165820479393, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.012063037604093552, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.0122373690828681, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.017823902890086174, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.012063037604093552, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.03595511242747307, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.03257707506418228, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.03109763376414776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.02739560604095459, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.01631046086549759, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.014944273047149181, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.020010940730571747, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.01769433729350567, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.016704510897397995, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.014152871444821358, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.013399839401245117, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.01027220580726862, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.008498958311975002, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.007831248454749584, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.007661719806492329, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.005160695873200893, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.004083961248397827, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0040091718547046185, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.003666904289275408, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0035568007733672857, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0027401272673159838, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.002644933294504881, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0024059319403022528, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0018123446498066187, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.01769433729350567, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0040091718547046185, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.029203245416283607, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.026386892423033714, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.025171492248773575, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0221394132822752, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.013196024112403393, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.012092231772840023, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.015774378553032875, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.014331281185150146, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.013525865972042084, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.011407211422920227, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.010715799406170845, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.007978536188602448, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.006827556528151035, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.006321385502815247, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.006197794806212187, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.003977842628955841, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.003290815744549036, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.003240643534809351, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0029446736443787813, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0028658881783485413, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0021178682800382376, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0021174445282667875, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0019298329716548324, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0014481941470876336, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.015774378553032875, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.003977842628955841, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1408979743719101, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12970302999019623, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1260545700788498, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11232681572437286, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06492443382740021, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06101230904459953, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07325630635023117, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06750820577144623, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06616123020648956, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05711318179965019, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.053648944944143295, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03717277944087982, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03214108571410179, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03098277561366558, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.030708929523825645, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018498104065656662, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015792271122336388, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015707680955529213, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014205701649188995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014019312337040901, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00958424061536789, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009407171979546547, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009159398265182972, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005916904658079147, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015792271122336388, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005916904658079147, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.18704867362976074, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.14864599704742432, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.13388295471668243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10479527711868286, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08367615193128586, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.068878673017025, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1053638681769371, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09537272900342941, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08943136036396027, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06037385016679764, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05571495369076729, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.054629504680633545, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04674943909049034, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.041557587683200836, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04026998206973076, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02797609567642212, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023694131523370743, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023343661800026894, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.019766058772802353, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.018947547301650047, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016457483172416687, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017784524708986282, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014940053224563599, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014514257200062275, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016457483172416687, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014514257200062275, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1794603317975998, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16894960403442383, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16581906378269196, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1507737785577774, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08255647867918015, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07924202084541321, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09167131781578064, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08441844582557678, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08355327695608139, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07501491159200668, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07168804854154587, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04670361801981926, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04026234894990921, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.039450641721487045, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03926027938723564, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023203270509839058, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02059883065521717, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02054876834154129, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019161589443683624, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019042490050196648, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012667955830693245, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01297715213149786, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012406283058226109, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009268025867640972, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012667955830693245, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009268025867640972, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21134503185749054, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1992684006690979, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19567610323429108, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17810304462909698, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09737438708543777, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09353325515985489, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10822556167840958, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09955229610204697, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09856566041707993, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0885736495256424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08466500788927078, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05484674125909805, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04726121574640274, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04630788043141365, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04608774557709694, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.027150560170412064, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023599589243531227, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023540930822491646, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021848993375897408, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021698851138353348, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014409655705094337, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014026559889316559, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014097237959504128, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009001570753753185, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014409655705094337, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009001570753753185, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.22443902492523193, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2026541531085968, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.1946096420288086, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17507174611091614, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10261479765176773, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09446589648723602, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12002002447843552, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1099037304520607, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10580090433359146, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09007977694272995, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08566317707300186, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.061285704374313354, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.052584338933229446, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.04931271821260452, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.048525821417570114, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.030535442754626274, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.025898152962327003, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.025726202875375748, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.023486487567424774, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.022975115105509758, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01676413044333458, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.016826486214995384, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01571514457464218, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011891788803040981, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01676413044333458, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011891788803040981, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.051734279841184616, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.04736781865358353, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.04557359218597412, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0405646488070488, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.023922152817249298, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.022245898842811584, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.028791381046175957, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.025533823296427727, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.024413958191871643, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.021010389551520348, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.01967090182006359, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.014853534288704395, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.012281946837902069, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.011574112810194492, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.011408731341362, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.007472939323633909, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.006021751090884209, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.005953386425971985, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0054570529609918594, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.005349780432879925, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0039639221504330635, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.003786221146583557, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.003741875058040023, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.002550543984398246, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.014853534288704395, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.003741875058040023, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.041892558336257935, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.038266416639089584, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.03662069886922836, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.032449834048748016, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.019188271835446358, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.01770731434226036, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.022829847410321236, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.020788898691534996, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.01960219256579876, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.016758717596530914, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.01585259847342968, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.01158427819609642, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.009924217127263546, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.009217760525643826, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.009047521278262138, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0057689775712788105, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.004831660073250532, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.004761710297316313, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.004366669803857803, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0042596799321472645, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.003095492022112012, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.003145747585222125, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.002858250169083476, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00220910063944757, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.016758717596530914, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0042596799321472645, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.18526868522167206, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17132578790187836, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16680990159511566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14913199841976166, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08613299578428268, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08114835619926453, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.09766995161771774, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.08939367532730103, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08767746388912201, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07598754018545151, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07170069962739944, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04954927787184715, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04260234534740448, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04113175719976425, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04077896475791931, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.024676470085978508, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.020953981205821037, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.020842064172029495, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01888185553252697, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.018644161522388458, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.012839610688388348, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.012451251968741417, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.012284286320209503, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.007803868502378464, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.012839610688388348, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.007803868502378464, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1589740812778473, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1342145949602127, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12081886827945709, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09694942831993103, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07237175107002258, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06083143875002861, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.09994566440582275, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.08639266341924667, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07552643865346909, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05749809741973877, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05213610455393791, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05038529261946678, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04321937635540962, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03716172277927399, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.035520005971193314, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02646133303642273, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02271578274667263, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.022076399996876717, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020389055833220482, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.019476372748613358, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015950731933116913, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018490049988031387, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01398539450019598, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015887919813394547, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015950731933116913, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01398539450019598, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1933305561542511, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18151788413524628, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1779066026210785, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1615002602338791, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09074725955724716, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08678212016820908, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1009628102183342, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09297376871109009, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09197190403938293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.081906259059906, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07804669439792633, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.051873333752155304, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.044851697981357574, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0438726581633091, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.043636590242385864, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025758294388651848, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023484298959374428, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02341805025935173, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02183476835489273, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021696873009204865, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014394897036254406, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015571174211800098, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014085604809224606, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01191339548677206, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014394897036254406, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01191339548677206, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23658064007759094, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22232060134410858, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21796734631061554, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19794750213623047, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11080920696258545, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10599317401647568, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1232740506529808, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11352503299713135, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11231068521738052, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10001390427350998, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09523782134056091, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06293515861034393, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05414898321032524, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05297413468360901, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05267753079533577, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031115099787712097, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027295127511024475, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027218034490942955, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02516222558915615, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024977989494800568, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016746651381254196, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016704576089978218, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01636207476258278, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011303262785077095, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016746651381254196, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011303262785077095, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.0610799565911293, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.057314593344926834, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.03820909932255745, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.034679729491472244, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.02588399313390255, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.016656139865517616, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.050908204168081284, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.04613565281033516, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.026443740352988243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.023741718381643295, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.023305287584662437, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.02220764569938183, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.0201422106474638, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.008919104002416134, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.009417257271707058, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.00576309347525239, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.006070191506296396, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.00486617349088192, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.0056487624533474445, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.004273546393960714, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.004886394366621971, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.00494688143953681, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.0030758872162550688, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.0027222398202866316, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.016656139865517616, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.004273546393960714, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.06656347960233688, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.06043606624007225, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.05789565667510033, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.051217835396528244, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03053809516131878, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.028137192130088806, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.035701289772987366, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.03281523659825325, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.03127120062708855, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.026464683935046196, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.02494124136865139, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.018107665702700615, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.015633028000593185, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.014623376540839672, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.014382848516106606, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.008996925316751003, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0075757503509521484, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0074822017922997475, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.006777531001716852, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.006618044804781675, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.004747569561004639, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0047880723141133785, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004409360233694315, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.003215402364730835, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.018107665702700615, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004409360233694315, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.05910187587141991, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.05358445644378662, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.05127200484275818, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.04536684975028038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.027106359601020813, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.02492152899503708, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.03195478022098541, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.029251867905259132, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.027757180854678154, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.023479605093598366, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.022192981094121933, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.016207976266741753, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.013940083794295788, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.012982415035367012, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.012750630266964436, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.008055703714489937, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0067148939706385136, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0066209048964083195, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.006003062706440687, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.005850840825587511, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.004244028124958277, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.004240227397531271, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00392310181632638, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0028230883181095123, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.016207976266741753, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00392310181632638, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1932598501443863, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17809578776359558, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17304442822933197, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15446113049983978, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09011735767126083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08459945023059845, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10200043022632599, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0938233882188797, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09183334559202194, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0791337713599205, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07443583756685257, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.051776766777038574, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04471895843744278, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04307360202074051, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04266589879989624, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02577391266822815, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02193542569875717, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.021812384948134422, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01968211680650711, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.019419606775045395, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.013363778591156006, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01305766124278307, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.012750761583447456, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008158217184245586, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.013363778591156006, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008158217184245586, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20521795749664307, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17581284046173096, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1629130244255066, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13669860363006592, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09609729051589966, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08201678097248077, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11797475069761276, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10744881629943848, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09981133043766022, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07646546512842178, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06986338645219803, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.061521824449300766, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.052639152854681015, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.047430288046598434, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04613296687602997, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03123941831290722, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02635003998875618, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02586785890161991, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022811641916632652, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021999621763825417, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017851265147328377, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019057603552937508, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01629484072327614, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015055988915264606, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017851265147328377, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015055988915264606, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20698924362659454, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1940430849790573, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19007009267807007, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17204917967319489, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0971660241484642, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09269250184297562, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10806291550397873, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09964577108621597, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09853541105985641, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08724873512983322, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08281033486127853, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05507979542016983, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.047534555196762085, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04641261324286461, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04614894837141037, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.027178913354873657, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023874757811427116, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023808013647794724, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021908676251769066, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02173621393740177, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014505529776215553, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014569120481610298, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014134975150227547, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009754578582942486, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014505529776215553, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009754578582942486, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24896030128002167, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23347368836402893, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2287561148405075, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20720255374908447, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11683011054992676, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11151974648237228, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13001935184001923, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1197952851653099, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11849323660135269, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10497545450925827, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09962011873722076, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06600601226091385, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.056997884064912796, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05566927418112755, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.055354923009872437, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03254830092191696, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028243442997336388, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02816116251051426, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025826213881373405, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025614166632294655, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017090853303670883, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0166284441947937, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01664457842707634, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010311617515981197, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017090853303670883, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010311617515981197, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2638694643974304, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2348061054944992, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22432087361812592, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1977798044681549, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12232980877161026, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11114747822284698, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.14285625517368317, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.13120205700397491, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12649498879909515, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.1040540337562561, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09755805134773254, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07318449020385742, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06282009929418564, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05882382020354271, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05786433815956116, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03627583011984825, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03078301250934601, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.030609190464019775, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02723800018429756, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.026587728410959244, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.019810019060969353, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01987420581281185, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.018512150272727013, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013817197643220425, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013817197643220425, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013817197643220425, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.07599250227212906, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.06904259324073792, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06602618843317032, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.058454085141420364, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03499120846390724, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03217219561338425, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04102076590061188, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.037794750183820724, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.035812199115753174, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03031402826309204, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.028528979048132896, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.020814476534724236, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.018051618710160255, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.016786891967058182, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.016485434025526047, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.010355028323829174, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.00876662228256464, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.00864172913134098, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.007862849161028862, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.007665565703064203, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00550671573728323, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0056505752727389336, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005090042948722839, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0038976639043539762, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.018051618710160255, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0038976639043539762, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.06645435839891434, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.06032627820968628, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.05756651237607002, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.05094799026846886, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.030561938881874084, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.028045687824487686, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.03631845861673355, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.03326062485575676, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.031308118253946304, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.026514364406466484, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.025072604417800903, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.01843605563044548, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.015872707590460777, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.01467017363756895, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.014381992630660534, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.009167616255581379, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.007631477899849415, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.007505790796130896, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.006835118867456913, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.006646013353019953, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.004849474411457777, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.004893200006335974, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004450574051588774, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0033140808809548616, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.015872707590460777, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004450574051588774, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20099501311779022, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1851547211408615, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17979778349399567, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16043202579021454, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09387461841106415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08803150802850723, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10653600841760635, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09788724035024643, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09569426625967026, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08231912553310394, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07736969739198685, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.054102350026369095, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04668005555868149, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.044869426637887955, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04443595930933952, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.026942897588014603, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.022883068770170212, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.022735247388482094, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020509742200374603, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020224694162607193, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0139981210231781, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01365765929222107, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01332360040396452, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008557635359466076, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0139981210231781, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008557635359466076, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2066139280796051, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18035544455051422, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17032933235168457, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14057692885398865, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09677179902791977, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08623162657022476, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11518148332834244, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1047283336520195, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09990999102592468, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07764582335948944, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06896103173494339, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05958227068185806, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05107997730374336, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04749966412782669, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04663456976413727, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030217204242944717, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026156676933169365, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025861162692308426, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02268691174685955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022137442603707314, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017247051000595093, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018431004136800766, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01617301069200039, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014531484805047512, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017247051000595093, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014531484805047512, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19329357147216797, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1806391477584839, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1767614185810089, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15975169837474823, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09098335355520248, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08657683432102203, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1014532819390297, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09353556483983994, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09238194674253464, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08140712976455688, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0771360993385315, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.051850464195013046, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04471614956855774, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.043574851006269455, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04330753907561302, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025597071275115013, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.022602571174502373, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022528400644659996, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.020722828805446625, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02054995857179165, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01384067814797163, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014087090268731117, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013465434312820435, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009765575639903545, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01384067814797163, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009765575639903545, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2542315423488617, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23777233064174652, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23282161355018616, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2104838341474533, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11962778866291046, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11385245621204376, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13349710404872894, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12286122888326645, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12141325324773788, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10703665018081665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10147006809711456, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06784306466579437, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05850018933415413, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.057029228657484055, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05669453740119934, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033465415239334106, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0289553701877594, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028861284255981445, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026395311579108238, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02616235427558422, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017652947455644608, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017107805237174034, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017155954614281654, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010643191635608673, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017652947455644608, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010643191635608673, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2652765214443207, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2357151359319687, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2249867171049118, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19794343411922455, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12324967235326767, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11168795824050903, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1444019377231598, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1323097050189972, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12737590074539185, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10450871288776398, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09773392975330353, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07408274710178375, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06351093202829361, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.059411097317934036, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.058415282517671585, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03684551641345024, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03130943700671196, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.031122220680117607, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.027704745531082153, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.027048775926232338, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.02043955586850643, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.020503440871834755, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01912028156220913, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01460016518831253, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01460016518831253, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01460016518831253, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09299805015325546, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08544305711984634, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08239147067070007, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07344112545251846, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.043224792927503586, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04024779424071312, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0499519407749176, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04596078768372536, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04411008208990097, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03789275512099266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03571531921625137, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0254090316593647, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.021965842694044113, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02076530270278454, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.020485635846853256, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012637867592275143, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010822322219610214, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010709378868341446, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009766592644155025, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009585699066519737, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0067383162677288055, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0068811411038041115, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006336021702736616, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004760073963552713, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012637867592275143, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004760073963552713, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.07786927372217178, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0715966448187828, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06891442090272903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.06145477294921875, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03619804605841637, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03365888074040413, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04228832200169563, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.03869399055838585, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.036922261118888855, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03175754472613335, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.030080392956733704, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.021496174857020378, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.018469419330358505, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.017360849305987358, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.017089931294322014, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.010683796368539333, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.00898060854524374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.008870777674019337, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.008099334314465523, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.007928517647087574, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.005659778602421284, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.005647660698741674, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0052953786216676235, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0037802786100655794, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.017360849305987358, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0037802786100655794, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2180817574262619, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2017214149236679, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19633746147155762, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1757294088602066, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10219325125217438, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09619144350290298, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11554546654224396, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10619654506444931, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10402988642454147, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09009609371423721, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0848269909620285, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05872245505452156, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.050645213574171066, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04882971569895744, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.048398833721876144, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.029185190796852112, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024844394996762276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024709155783057213, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02234971523284912, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022069117054343224, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0151046859100461, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014717789366841316, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01443067379295826, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009117309004068375, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0151046859100461, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009117309004068375, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20967291295528412, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17420558631420135, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15973860025405884, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1325288861989975, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09671574085950851, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08135061711072922, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11915174871683121, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10838238149881363, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10101699829101562, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07433924823999405, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06673922389745712, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.061524420976638794, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05276504158973694, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04748430475592613, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04620030149817467, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03091432712972164, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026164287701249123, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02571653388440609, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022133149206638336, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02128380723297596, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017384717240929604, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018713442608714104, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015766240656375885, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014582822099328041, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017384717240929604, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014582822099328041, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18590597808361053, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17361722886562347, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1697765588760376, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15322111546993256, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08758403360843658, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0831703469157219, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09775664657354355, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0901535376906395, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08892188221216202, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07815291732549667, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07400805503129959, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04983581230044365, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.043006714433431625, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04184582829475403, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04157569631934166, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.024570876732468605, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021480759605765343, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021402986720204353, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01960236206650734, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01942150481045246, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013093038462102413, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013075408525764942, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012702261097729206, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008654268458485603, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013093038462102413, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008654268458485603, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.25534385442733765, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23866048455238342, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23353636264801025, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2110106199979782, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12035737186670303, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1143827810883522, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13442718982696533, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12373534590005875, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12216687202453613, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10749325901269913, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.1018969938158989, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0683806911110878, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.058956246823072433, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.057403940707445145, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05705447122454643, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03372947499155998, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02917099930346012, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02906898967921734, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026556948199868202, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026316804811358452, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01781638339161873, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017273476347327232, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017290517687797546, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01078164391219616, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01781638339161873, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01078164391219616, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2699310779571533, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23951978981494904, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2283283919095993, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.2010485678911209, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12538421154022217, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11341597884893417, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.14733706414699554, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.13493694365024567, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12967811524868011, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.1062348261475563, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0994829535484314, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07544989883899689, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06476398557424545, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06044265627861023, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05940253287553787, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03746774420142174, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.031846191734075546, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03164182975888252, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02817412093281746, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02747277356684208, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.020649507641792297, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.02087719365954399, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.019252032041549683, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014828658662736416, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014828658662736416, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014828658662736416, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09103915840387344, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0837659016251564, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08078174293041229, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07203146070241928, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.042336106300354004, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03941952809691429, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.048969220370054245, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04506491869688034, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04315777122974396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.037158023566007614, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03505641222000122, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.024903446435928345, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.021505162119865417, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.020288726314902306, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.019993215799331665, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012361112050712109, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010477985255420208, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010356982238590717, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009450709447264671, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.00925766583532095, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006537216249853373, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006555173546075821, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006121268030256033, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004371119663119316, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012361112050712109, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004371119663119316, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.07720901817083359, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.07113485783338547, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06836249679327011, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.06101653724908829, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03589179739356041, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03332240507006645, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04194594547152519, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.038562025874853134, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.036588337272405624, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03155263885855675, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.029859958216547966, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.021312979981303215, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.018400179222226143, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.017194019630551338, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.01690819300711155, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.010575154796242714, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.008879050612449646, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.008751940913498402, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.008016553707420826, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.007825707085430622, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0055791777558624744, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.005572329740971327, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005178356077522039, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0036803053226321936, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.017194019630551338, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0036803053226321936, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2128356695175171, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19701161980628967, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19169622659683228, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1716049462556839, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09979542344808578, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09393572062253952, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11328785866498947, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10378672927618027, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10159440338611603, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0879979282617569, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08299902826547623, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.057626884430646896, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.049511224031448364, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04768994823098183, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.047248855233192444, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02864537574350834, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024272551760077477, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02413034997880459, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021839536726474762, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021549750119447708, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014837413094937801, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014403358101844788, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014120667241513729, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008931898511946201, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014837413094937801, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008931898511946201, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2282877415418625, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1975625604391098, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.186166450381279, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15414828062057495, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10575218498706818, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09347844123840332, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12713934481143951, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1155523806810379, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11026103794574738, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08537446707487106, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07610445469617844, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06563413143157959, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.056463368237018585, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.051995325833559036, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05090222507715225, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03307914733886719, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02883376181125641, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02850533090531826, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025174185633659363, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024487582966685295, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01887483336031437, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.02061515673995018, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017542008310556412, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016380207613110542, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017542008310556412, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016380207613110542, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17346470057964325, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16197852790355682, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1582837998867035, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1429702788591385, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0818261131644249, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07769332081079483, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09162703156471252, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08436312526464462, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08309026062488556, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07302870601415634, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06922516971826553, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.046733956784009933, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.040329474955797195, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03918760269880295, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.038918476551771164, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023090209811925888, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020255744457244873, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020173843950033188, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01851213537156582, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018336815759539604, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012459280900657177, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012536324560642242, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012085403315722942, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008549493737518787, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012459280900657177, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008549493737518787, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2500482499599457, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23370762169361115, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22857621312141418, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.206643208861351, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11822768300771713, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11232224851846695, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.132169708609581, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1216544657945633, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12001238018274307, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10554679483175278, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10023844242095947, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06748008728027344, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05809319019317627, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.056554991751909256, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05619499087333679, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033295419067144394, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.029156846925616264, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.029052764177322388, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02664157748222351, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026408091187477112, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.0178559347987175, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017903335392475128, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01733587495982647, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.012045110575854778, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.0178559347987175, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.012045110575854778, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2634498178958893, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2340482771396637, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2231612652540207, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1961514949798584, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12224570661783218, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11072831600904465, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.14473260939121246, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.13161467015743256, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12643329799175262, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.1037321612238884, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09721439331769943, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07400386780500412, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06337551027536392, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0591987669467926, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05818409472703934, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03712083771824837, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03165239095687866, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03144855424761772, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.028168251737952232, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02751564420759678, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.02112177200615406, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.021305883303284645, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01973387412726879, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015803232789039612, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015803232789039612, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015803232789039612, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10258330404758453, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09473257511854172, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09153013676404953, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08189063519239426, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04769308865070343, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0445391871035099, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05491827800869942, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0506172738969326, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.048582084476947784, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04208080843091011, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03967667743563652, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.027897125110030174, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02417803928256035, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022878626361489296, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.022562840953469276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0139318211004138, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011843406595289707, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011717654764652252, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010714792646467686, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010513837449252605, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007325359620153904, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007370732259005308, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006882730405777693, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004929149057716131, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0139318211004138, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004929149057716131, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.08472225815057755, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0782487690448761, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.07538199424743652, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.06746470928192139, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.039380062371492386, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03667055442929268, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04573165997862816, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04213778302073479, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04009626433253288, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03477253392338753, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03286217898130417, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02321699820458889, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.020135972648859024, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.01889151521027088, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.018590431660413742, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.011591753922402859, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.009775067679584026, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.009646344929933548, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.008845950476825237, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.008652712218463421, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006094546988606453, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0061047556810081005, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005688503384590149, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00404986971989274, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.011591753922402859, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00404986971989274, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2284904271364212, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21198318898677826, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2065194845199585, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18539445102214813, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10714860260486603, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10104073584079742, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12149316072463989, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11129875481128693, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1089761033654213, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09487789869308472, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0895601436495781, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06181904301047325, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05310901626944542, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05122390389442444, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05076557397842407, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030834337696433067, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026075100526213646, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025931118056178093, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023536695167422295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023241018876433372, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0159128587692976, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015388420782983303, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015122882090508938, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009526546113193035, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0159128587692976, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009526546113193035, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21858690679073334, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19141347706317902, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18164637684822083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15576191246509552, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10216718912124634, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09135051816701889, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11929553002119064, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1095649003982544, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10531099885702133, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08392594009637833, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07539597898721695, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06196676567196846, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05370119959115982, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05038614198565483, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04956584796309471, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03153621777892113, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027959872037172318, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027713671326637268, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024652866646647453, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024168750271201134, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018206240609288216, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019866902381181717, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017256034538149834, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015935558825731277, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018206240609288216, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015935558825731277, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.16860073804855347, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.15769152343273163, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15414254367351532, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1394461691379547, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07958431541919708, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07563051581382751, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08878982812166214, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08204882591962814, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08079376071691513, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07121072709560394, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0674237459897995, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04540371522307396, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.039401330053806305, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.038283269852399826, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03801850229501724, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.022650612518191338, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02000894583761692, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.01992763951420784, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.018348457291722298, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01818229630589485, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012127715162932873, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012560570612549782, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011762178502976894, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008881033398211002, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01818229630589485, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008881033398211002, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24609780311584473, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2303810566663742, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22546620666980743, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20412211120128632, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11595296859741211, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11029684543609619, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12944483757019043, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11932196468114853, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11770457029342651, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10383540391921997, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09833697974681854, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06579770147800446, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05701937898993492, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05549774318933487, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05513335391879082, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03284986689686775, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028369680047035217, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028266990557312965, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.0258762426674366, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025642002001404762, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017241433262825012, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01685616932809353, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016722967848181725, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010751252993941307, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017241433262825012, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010751252993941307, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2625364363193512, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23450739681720734, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22422106564044952, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19784103333950043, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12189900875091553, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11087937653064728, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.14287398755550385, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.13084428012371063, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12585687637329102, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.1040787324309349, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09735670685768127, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07298880070447922, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06307240575551987, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05904104933142662, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05805402994155884, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.036678995937108994, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03142916411161423, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03123505413532257, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.027995288372039795, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02736477367579937, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.02015194110572338, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.020822694525122643, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01884484849870205, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015233367681503296, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015233367681503296, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015233367681503296, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10679705440998077, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09868719428777695, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09537019580602646, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08544135093688965, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0497986376285553, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04654979705810547, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.057366449385881424, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05286708101630211, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05071691796183586, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0440271832048893, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.041605956852436066, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.029240457341074944, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0254055205732584, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0240218173712492, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.023691339418292046, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014642464928328991, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012676648795604706, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012546534650027752, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01155018713325262, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011346731334924698, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007835413329303265, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008226034231483936, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0073730358853936195, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005916327238082886, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014642464928328991, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005916327238082886, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09224271774291992, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08534076064825058, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08232640475034714, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07379739731550217, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04300611838698387, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04012174531817436, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04988107830286026, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04590941220521927, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04377215728163719, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.038036514073610306, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03602848947048187, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.025379328057169914, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.021940922364592552, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0206428375095129, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0203225240111351, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01267028134316206, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01069373544305563, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01056558545678854, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009697840549051762, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009502014145255089, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006670853588730097, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006694813258945942, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006232716143131256, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004493875429034233, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01267028134316206, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004493875429034233, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2279096096754074, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21171076595783234, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20615024864673615, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18529115617275238, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10697118192911148, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10091657936573029, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.121536985039711, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11124379932880402, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1088089868426323, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09484297782182693, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08957628160715103, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06182537600398064, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05313118174672127, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.051184218376874924, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.050720032304525375, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030818743631243706, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026119550690054893, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02595498040318489, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023605545982718468, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023298893123865128, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015962181612849236, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015531542710959911, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015182219445705414, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009733776561915874, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015962181612849236, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009733776561915874, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2379177063703537, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20404723286628723, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19066765904426575, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15810465812683105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10991513729095459, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.096082903444767, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13310925662517548, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12114979326725006, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11403949558734894, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08766394108533859, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07830118387937546, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06860530376434326, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05951801687479019, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.054446280002593994, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05320104584097862, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.034894511103630066, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030719086527824402, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.030286138877272606, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02682177722454071, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026048600673675537, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0202473271638155, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.022515803575515747, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01874796487390995, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.018313661217689514, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.018313661217689514, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.018313661217689514, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18119299411773682, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16941706836223602, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16562889516353607, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14986054599285126, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08554064482450485, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08126368373632431, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09544135630130768, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0882122740149498, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08682651817798615, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07654765248298645, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07246124744415283, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.048755574971437454, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04232710227370262, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0411100760102272, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0408175103366375, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.024309342727065086, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021403629332780838, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021319862455129623, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01960918866097927, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019433939829468727, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012935996055603027, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013336275704205036, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012527636252343655, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00929664634168148, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012935996055603027, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00929664634168148, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.25254788994789124, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23642456531524658, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23132474720478058, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20931418240070343, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11908435821533203, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11325197666883469, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13282105326652527, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12262263149023056, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1208607479929924, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10662286728620529, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10088106244802475, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0675458014011383, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.058548327535390854, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05695752054452896, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05657840892672539, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033655036240816116, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02904379740357399, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02893236093223095, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026461979374289513, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026218583807349205, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01753268949687481, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01714707724750042, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01698211394250393, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010783759877085686, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01753268949687481, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010783759877085686, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.27738162875175476, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.24691620469093323, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23567190766334534, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.20730946958065033, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1290779560804367, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11697032302618027, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1510971337556839, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1387544721364975, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13338039815425873, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10956623405218124, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10224546492099762, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07764240354299545, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06698936969041824, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06256916373968124, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.06150693818926811, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.0390511117875576, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03342672064900398, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03322101756930351, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02969127893447876, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02901323139667511, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.021631799638271332, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.022309521213173866, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.02022712305188179, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.016477802768349648, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.016477802768349648, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.016477802768349648, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1126021072268486, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10418619215488434, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10076642036437988, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09025099873542786, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05246558412909508, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04909053444862366, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06028442457318306, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05556539446115494, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05338249355554581, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04637845978140831, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04378194361925125, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.030678102746605873, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026597825810313225, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025197679176926613, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024859141558408737, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015326516702771187, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013111770153045654, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01297223474830389, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011898553930222988, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011685485951602459, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008107985369861126, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008247883059084415, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0076286508701741695, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0056390827521681786, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015326516702771187, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0056390827521681786, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09304530173540115, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08609581738710403, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08294516056776047, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07429222762584686, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.043259065598249435, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04031651094555855, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.050245460122823715, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.046382639557123184, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.044035207480192184, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03827003762125969, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03617167845368385, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.025478914380073547, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.022151416167616844, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0207549799233675, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.020412979647517204, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012729327194392681, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010751194320619106, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010601500049233437, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009756265208125114, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009539641439914703, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006697158794850111, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0067452131770551205, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006248102523386478, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0045025041326880455, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012729327194392681, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0045025041326880455, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22617046535015106, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21009691059589386, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20429150760173798, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18339188396930695, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10602118074893951, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09985747188329697, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12033170461654663, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11077729612588882, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10777155309915543, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09399310499429703, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08856289088726044, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06119513139128685, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.052904244512319565, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05071574077010155, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05019611120223999, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03051632083952427, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025904227048158646, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02570594847202301, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023426294326782227, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023084236308932304, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015811186283826828, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015484570525586605, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015009008347988129, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009735164232552052, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015811186283826828, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009735164232552052, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24975784122943878, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21958068013191223, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2082032412290573, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17630833387374878, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11621695011854172, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10439988225698471, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13809695839881897, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1262359470129013, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12010251730680466, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09618545323610306, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08677028864622116, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07138200104236603, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06187030300498009, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.057324957102537155, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05623622611165047, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03623507171869278, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03189248964190483, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.031528081744909286, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02826620079576969, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02757585421204567, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.020815685391426086, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.022851182147860527, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.019491901621222496, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01829131320118904, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01829131320118904, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01829131320118904, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17585046589374542, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1643431931734085, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1605638712644577, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14511820673942566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08301510661840439, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07879634946584702, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0928221121430397, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08571838587522507, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08427765220403671, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0742073655128479, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07024748623371124, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04749012365937233, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0411965548992157, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03997817263007164, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03968938812613487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02372211031615734, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020977124571800232, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020891224965453148, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01924472488462925, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01906765066087246, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012793436646461487, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013296066783368587, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012403625063598156, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00951584056019783, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012793436646461487, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00951584056019783, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24850766360759735, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23249046504497528, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2274080365896225, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20550887286663055, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11700401455163956, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11123911291360855, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1306615173816681, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1205458790063858, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11879058182239532, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10462488234043121, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0989510715007782, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06638391315937042, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05756319686770439, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05597655847668648, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.055596064776182175, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033111535012722015, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028575604781508446, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028466686606407166, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.0260233785957098, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025782495737075806, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01731620542705059, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016928773373365402, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016775133088231087, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010710244067013264, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01731620542705059, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010710244067013264, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.28079167008399963, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2505444288253784, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2398737519979477, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.21062961220741272, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13070151209831238, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11897080391645432, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.15152673423290253, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.13955725729465485, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13490687310695648, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11106802523136139, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10333150625228882, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07778770476579666, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.067325159907341, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06330922991037369, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.062324799597263336, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03908032923936844, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03367841616272926, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.033518627285957336, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.029899155721068382, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.029282284900546074, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.021520890295505524, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.02225968800485134, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.02028779685497284, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.0163345355540514, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.0163345355540514, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.0163345355540514, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11191409826278687, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10363497585058212, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10009640455245972, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08974683284759521, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05216338112950325, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04875875636935234, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06006498262286186, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.055451054126024246, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05307606980204582, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04614945873618126, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.043570730835199356, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.030555903911590576, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026512322947382927, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025031227618455887, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024676037952303886, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015259191393852234, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012972294352948666, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012825578451156616, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01177025493234396, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01154375821352005, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008029047399759293, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008110635913908482, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0075255706906318665, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005448673851788044, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015259191393852234, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005448673851788044, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09379806369543076, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08691678196191788, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08370905369520187, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.075028195977211, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04368581622838974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.040733855217695236, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05077258497476578, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.046905767172575, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.044452816247940063, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.038699451833963394, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0365852527320385, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02576557733118534, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.022386295720934868, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.020958533510565758, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.020605724304914474, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012865982949733734, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010835903696715832, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010679968632757664, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009830123744904995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009605364874005318, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006759779993444681, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006767891347408295, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006297629326581955, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004474242217838764, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012865982949733734, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004474242217838764, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2279280722141266, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21158026158809662, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20568251609802246, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1845354288816452, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10681755840778351, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10054311901330948, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12162132561206818, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11171918362379074, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10861861705780029, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09462908655405045, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08918802440166473, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06180651858448982, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.053351886570453644, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.051118772476911545, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05057339742779732, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030832646414637566, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026131806895136833, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025933776050806046, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023630214855074883, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02327537164092064, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015973424538969994, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015658240765333176, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015131472609937191, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009884588420391083, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015973424538969994, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009884588420391083, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2453300952911377, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2171897441148758, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20472660660743713, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1741873323917389, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11417341232299805, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.102403424680233, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13880424201488495, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12695026397705078, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11870485544204712, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09554626792669296, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0868326723575592, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07158491015434265, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0618964247405529, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.056006014347076416, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0545654296875, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0360025130212307, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030757615342736244, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03023613430559635, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027340684086084366, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026431409642100334, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01997252181172371, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.02181912027299404, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018174463883042336, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016912033781409264, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018174463883042336, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016912033781409264, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1746412217617035, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16314683854579926, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1593041718006134, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14394111931324005, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08246880024671555, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07821612060070038, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09233192354440689, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08529288321733475, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08375343680381775, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07367666810750961, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06972452253103256, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04724868759512901, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04101615771651268, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03974610194563866, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.039441440254449844, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02359057031571865, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02090098150074482, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020809954032301903, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019180193543434143, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.018994208425283432, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012705511413514614, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013325382955372334, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012290983460843563, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009611147455871105, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012705511413514614, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009611147455871105, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24561528861522675, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22952115535736084, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22439180314540863, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20262859761714935, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1156763806939125, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10984418541193008, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12931464612483978, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.119281105697155, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11743326485157013, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1033313199877739, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09762918949127197, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0656973123550415, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05696796998381615, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05533784627914429, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.054951269179582596, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.032730307430028915, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028273409232497215, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028159162029623985, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025729302316904068, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025481892749667168, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01703629456460476, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01679529808461666, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016466505825519562, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010678119026124477, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01703629456460476, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010678119026124477, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2822745740413666, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2522161900997162, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.24144625663757324, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.2119629681110382, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13162082433700562, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11984559148550034, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1527150273323059, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14064806699752808, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13583555817604065, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11191709339618683, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10407321900129318, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07859304547309875, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06787008792161942, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06371768563985825, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.06271078437566757, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03952543064951897, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.033823203295469284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.033653624355793, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.030000636354088783, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.029368752613663673, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.02182867005467415, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.022256961092352867, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.020556673407554626, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.0162177886813879, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.0162177886813879, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.0162177886813879, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11811293661594391, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10937514156103134, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10579933971166611, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09474825114011765, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05509894713759422, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05159047991037369, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06328978389501572, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05839037895202637, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05609072372317314, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.048719372600317, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0459381528198719, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0321534164249897, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027891753241419792, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02642245963215828, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026060355827212334, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01605207473039627, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013629145920276642, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013481776230037212, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012346356175839901, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012117130681872368, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00841290783137083, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0084201879799366, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007908898405730724, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0055482517927885056, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01605207473039627, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0055482517927885056, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09818381816148758, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09099984169006348, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08769767731428146, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07859190553426743, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04576468840241432, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04267393425107002, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.053086526691913605, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.049069471657276154, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04656589776277542, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0405392050743103, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03830153867602348, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02691463567316532, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02343311719596386, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02194548211991787, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021580563858151436, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013430214487016201, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011322827078402042, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01116647757589817, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010268432088196278, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010037067346274853, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0070444741286337376, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007041266653686762, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006574932485818863, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004616796504706144, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013430214487016201, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004616796504706144, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23667727410793304, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2197328358888626, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21349456906318665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19142262637615204, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11108174175024033, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10443321615457535, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12654979526996613, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11632958799600601, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11296550184488297, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09835993498563766, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09276846796274185, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06441378593444824, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.055650681257247925, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05318744108080864, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05259214714169502, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03216468170285225, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02726379595696926, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027037231251597404, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024661777541041374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024282826110720634, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016766078770160675, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016467629000544548, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01588333211839199, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010545453056693077, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016766078770160675, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010545453056693077, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.25537291169166565, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22670333087444305, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21609865128993988, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18505585193634033, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11951933056116104, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10812836140394211, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14047007262706757, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12848499417304993, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1231057420372963, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09934244304895401, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09121984243392944, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07228250056505203, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06255785375833511, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.058493323624134064, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05752481147646904, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0364593081176281, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03206837922334671, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03174007311463356, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.028331851586699486, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02771688997745514, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.020508116111159325, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.02234441041946411, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.019281936809420586, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.017492497339844704, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.017492497339844704, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.017492497339844704, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17922647297382355, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16732057929039001, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1633646935224533, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1475573480129242, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0847383588552475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08036214113235474, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09485074877738953, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08763663470745087, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0860338881611824, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07564765959978104, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07158445566892624, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04856729507446289, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04218307510018349, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04087991639971733, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04057362303137779, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0242572333663702, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02155047468841076, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.0214540995657444, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019778817892074585, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019586917012929916, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013086862862110138, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013810435310006142, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012664898298680782, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010031849145889282, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013086862862110138, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010031849145889282, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.248918816447258, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2325199693441391, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2273571491241455, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2052268534898758, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11733414977788925, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11141091585159302, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1312108188867569, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12100245803594589, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1191319152712822, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10474803298711777, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09898746013641357, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06674923747777939, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05778328329324722, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05612320452928543, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05572395399212837, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03325696289539337, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028616083785891533, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02849789336323738, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02601984143257141, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02576465904712677, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01733153872191906, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016915535554289818, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016741670668125153, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010627778246998787, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01733153872191906, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010627778246998787, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.28546854853630066, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2550584673881531, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.24414046108722687, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.21415579319000244, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1332063525915146, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.12126912921667099, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.15463322401046753, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14243778586387634, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13745060563087463, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11331824213266373, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10534428805112839, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07951118797063828, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06882583349943161, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06454090774059296, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.063514843583107, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.039950914680957794, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03441835194826126, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03423679620027542, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.030597984790802002, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02995278313755989, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.022027254104614258, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.02285217121243477, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.02072715014219284, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.016830865293741226, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.016830865293741226, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.016830865293741226, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1259414255619049, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11663039028644562, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11285275220870972, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10108611732721329, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05878257751464844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05504289269447327, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06742039322853088, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06225327402353287, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05982041358947754, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.052012767642736435, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04906341806054115, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03432035073637962, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029825204983353615, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028254752978682518, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027875036001205444, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017152445390820503, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014703069813549519, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014549522660672665, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013354684226214886, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01311627309769392, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009060398675501347, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009255279786884785, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008537315763533115, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006344761233776808, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017152445390820503, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006344761233776808, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10157759487628937, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0941147580742836, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09040724486112595, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08102671056985855, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04730656370520592, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.044019363820552826, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05521237477660179, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05110381171107292, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04813973605632782, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.041923392564058304, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03963065892457962, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.028015216812491417, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02439875341951847, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022699562832713127, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.022285636514425278, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013989472761750221, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011739646084606647, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011555667966604233, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010656706988811493, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01039482094347477, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007353656925261021, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007362922187894583, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006829769350588322, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00486270897090435, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013989472761750221, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00486270897090435, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24575607478618622, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2283007949590683, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2218993753194809, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19914184510707855, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11541878432035446, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10860784351825714, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13127434253692627, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12058054655790329, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11732488125562668, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10230530798435211, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09638103097677231, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06685107946395874, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.057714492082595825, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05526170879602432, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05466270446777344, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03337765112519264, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028317201882600784, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028089523315429688, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0256353672593832, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025248264893889427, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01738051138818264, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017091305926442146, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016459191218018532, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010939652100205421, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01738051138818264, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010939652100205421, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.25610050559043884, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22677624225616455, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21681056916713715, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18030261993408203, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11998577415943146, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10895762592554092, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13997767865657806, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12776227295398712, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12346847355365753, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09842298924922943, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08720461279153824, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07243120670318604, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06240980699658394, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05888494476675987, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.058041930198669434, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.036720748990774155, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03228330239653587, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03202727437019348, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02829449437558651, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02776755392551422, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.020904699340462685, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.02241477556526661, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.019891133531928062, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.017619343474507332, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.017619343474507332, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.017619343474507332, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17657797038555145, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16494271159172058, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16104070842266083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1454402059316635, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08350833505392075, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07919278740882874, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09344809502363205, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08633831888437271, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08481449633836746, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07456626743078232, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07052373886108398, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.047857433557510376, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04155832529067993, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04027911648154259, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03997243940830231, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023909753188490868, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021212762221693993, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02112380601465702, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01946054771542549, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0192707609385252, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012930638156831264, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013562950305640697, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012516291812062263, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009833121672272682, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012930638156831264, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009833121672272682, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.250085711479187, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23367951810359955, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2284947633743286, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2062792330980301, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11790405958890915, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11193565279245377, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13171865046024323, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1215222105383873, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11971590667963028, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1052415743470192, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09952003508806229, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0670301616191864, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.058029115200042725, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.056380029767751694, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05598173290491104, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03340056166052818, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028754765167832375, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02864217199385166, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026141265407204628, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025886958464980125, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01742144487798214, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016994183883070946, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016845135018229485, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.0106740677729249, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01742144487798214, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.0106740677729249, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.28353163599967957, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.25302812457084656, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.24213485419750214, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.21280017495155334, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13206183910369873, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.12014726549386978, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.15407422184944153, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.141274556517601, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13636144995689392, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11222551017999649, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.1045752763748169, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07883349806070328, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06801088899374008, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0638289824128151, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.06281406432390213, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03952554985880852, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03374472260475159, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03356880694627762, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.029885414987802505, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.029231412336230278, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.02155989408493042, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.022043423727154732, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.020212015137076378, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01584537886083126, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01584537886083126, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01584537886083126, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12876074016094208, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11945432424545288, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11567993462085724, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10375570505857468, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06009912118315697, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0563991479575634, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0688849538564682, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06355967372655869, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06114375218749046, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05329526588320732, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05033246800303459, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03502571955323219, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.030379971489310265, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0288244541734457, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02844598889350891, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017478665336966515, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01488188374787569, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014730616472661495, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013504534028470516, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013268197886645794, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009148276410996914, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009191925637423992, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008613650687038898, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006083215586841106, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017478665336966515, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006083215586841106, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10275737196207047, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09536544978618622, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09181422740221024, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0824342668056488, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04789477214217186, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04465825483202934, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0557025782763958, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05154411122202873, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04871847480535507, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.042506273835897446, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04019324854016304, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.028218476101756096, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02459624782204628, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022956183180212975, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02256029099225998, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01408330537378788, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011831602081656456, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011652939021587372, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010745215229690075, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010493333451449871, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007366673555225134, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007352292537689209, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00685163913294673, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004779329057782888, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01408330537378788, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004779329057782888, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24831321835517883, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2309902161359787, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22443075478076935, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20164315402507782, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11673291772603989, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10983125120401382, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13324354588985443, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12245321273803711, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1186598688364029, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10369515419006348, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09791351109743118, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06784095615148544, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05867026373744011, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.055917877703905106, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.055236876010894775, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03387754037976265, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02859112061560154, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028323782607913017, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025913918390870094, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025485539808869362, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017622269690036774, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017206383869051933, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01663910411298275, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010858501307666302, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017622269690036774, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010858501307666302, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.26064491271972656, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23669683933258057, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2285408228635788, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19977618753910065, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12234165519475937, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11311250925064087, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14014245569705963, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12851367890834808, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12517957389354706, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1045854315161705, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09668804705142975, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07241443544626236, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0626852735877037, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05991149693727493, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05926775559782982, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03673849627375603, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.032705243676900864, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03250271826982498, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.029374578967690468, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.028973251581192017, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.020942477509379387, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.022438907995820045, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.020154818892478943, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01760943792760372, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01760943792760372, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01760943792760372, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1811017096042633, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1691197156906128, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16513904929161072, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1491287797689438, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08556455373764038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08113743364810944, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09568528085947037, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08847403526306152, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08687848597764969, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07637561112642288, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07222238928079605, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.048942651599645615, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04250677675008774, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.041182175278663635, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.040874384343624115, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.024415088817477226, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02156461402773857, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021470196545124054, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01975756324827671, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01956932246685028, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013073544017970562, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0136237358674407, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012637312524020672, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009686100296676159, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013073544017970562, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009686100296676159, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.25004321336746216, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2337098866701126, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22848165035247803, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2063767910003662, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1179773360490799, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11201278120279312, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1317029446363449, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12160401791334152, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11973967403173447, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10534526407718658, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09949608892202377, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06698888540267944, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05808931589126587, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05642426759004593, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05602648854255676, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03335130587220192, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028775060549378395, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028659826144576073, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026168731972575188, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02591625228524208, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017327705398201942, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017014166340231895, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016749929636716843, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010708769783377647, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017327705398201942, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010708769783377647, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.28541821241378784, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2539176642894745, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.24251578748226166, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.21282993257045746, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1327347308397293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.12032031267881393, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.15480230748653412, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14232943952083588, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13719426095485687, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11237066239118576, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.1046353206038475, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0792931318283081, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06840583682060242, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06404662132263184, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.06298449635505676, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03979704901576042, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03363925591111183, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03345628082752228, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02963409759104252, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.028950095176696777, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.021647289395332336, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.02172071300446987, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.02026316337287426, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01527427788823843, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01527427788823843, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01527427788823843, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12328385561704636, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11450307816267014, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11087025701999664, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09956523776054382, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05767371878027916, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.054096221923828125, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06611510366201401, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06100670248270035, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05861986428499222, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05117424950003624, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04836799204349518, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03363113850355148, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02918413281440735, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02765296958386898, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027288449928164482, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01680251583456993, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014266383834183216, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01411430537700653, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012958000414073467, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01272149570286274, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008814449422061443, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00880036037415266, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008303186856210232, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00579933961853385, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01680251583456993, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00579933961853385, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.101994588971138, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09469671547412872, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09121311455965042, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08183898776769638, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04755737632513046, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.044372767210006714, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.055248159915208817, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05114947631955147, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0483776330947876, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04224657267332077, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03993551805615425, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.027988329529762268, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024425910785794258, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022811954841017723, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.022415824234485626, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013973806984722614, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011770572513341904, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01159537024796009, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010696737095713615, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010449016466736794, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007321897428482771, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007332697045058012, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006819122936576605, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004800933878868818, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013973806984722614, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004800933878868818, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.253558486700058, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23609896004199982, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22967024147510529, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20655658841133118, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11929319053888321, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11244217306375504, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13579584658145905, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12470006942749023, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12120119482278824, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10607948154211044, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10014056414365768, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06915803253650665, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05969075486063957, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0571284294128418, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.056498076766729355, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03450770303606987, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029233956709504128, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028986681252717972, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02651580609381199, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026111407205462456, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017967544496059418, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01757943443953991, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017011526972055435, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011144149117171764, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017967544496059418, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011144149117171764, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2509315311908722, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22563235461711884, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21453869342803955, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18684132397174835, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1169113740324974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1063164621591568, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14056091010570526, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12859000265598297, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12102673202753067, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1005253940820694, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09278403222560883, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07238481193780899, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06222943216562271, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05679968744516373, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05545244365930557, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03640062361955643, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03027241863310337, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029781023040413857, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027097761631011963, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026265686377882957, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01981893554329872, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.02037755958735943, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01805906370282173, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014823480509221554, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01805906370282173, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014823480509221554, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17963288724422455, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16774936020374298, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16388224065303802, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14800944924354553, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08492238819599152, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08055751770734787, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09492117911577225, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08775656670331955, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08623591810464859, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0758395716547966, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07171536982059479, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04853610321879387, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04220274090766907, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04092646390199661, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0406220518052578, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.024248598143458366, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021507754921913147, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02141783945262432, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019718853756785393, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0195330660790205, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013046381063759327, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013680166564881802, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012630842626094818, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00984266959130764, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013046381063759327, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00984266959130764, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2526124119758606, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2359396517276764, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23070533573627472, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20845286548137665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.119159996509552, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11314789205789566, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1330592930316925, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12277662009000778, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12096474319696426, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10640483349561691, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10052531957626343, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06765561550855637, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0586409829556942, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.057005953043699265, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05661342293024063, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03373126685619354, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02910330891609192, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028986109420657158, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02647385559976101, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026220807805657387, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01762906275689602, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017237534746527672, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01706823520362377, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010908310301601887, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01762906275689602, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010908310301601887, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.28461965918540955, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2528327703475952, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.24113713204860687, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.21152590215206146, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13225483894348145, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11967843770980835, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1548391431570053, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.142339825630188, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13693921267986298, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.1118677631020546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10432586818933487, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0794747993350029, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06847094744443893, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06384678930044174, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.06273560971021652, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.039909131824970245, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.033604830503463745, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.033397331833839417, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.029603445902466774, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02886163629591465, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.02179444395005703, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.021818703040480614, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.020323077216744423, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015375367365777493, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015375367365777493, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015375367365777493, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12927445769309998, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11991439014673233, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11607532948255539, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10415714234113693, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06057201698422432, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0568072609603405, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0695333182811737, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06399419903755188, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.061623465269804, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05364097282290459, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05073295906186104, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.035403840243816376, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03061179257929325, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02906579151749611, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028697436675429344, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017608238384127617, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015057913027703762, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014903480187058449, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013672716915607452, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013435889966785908, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00931716337800026, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009416828863322735, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008804949931800365, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006361983250826597, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017608238384127617, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006361983250826597, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10576820373535156, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09809422492980957, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09443362802267075, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08468732237815857, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04940526932477951, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.046064186841249466, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.057352643460035324, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05306002497673035, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05027272179722786, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04376737028360367, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.041359540075063705, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0291118323802948, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.025325706228613853, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.023669028654694557, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0232741367071867, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014470905065536499, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012200961820781231, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012023924849927425, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011071139015257359, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010814971290528774, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007636188063770533, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007617224007844925, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007127632852643728, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004983318038284779, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014470905065536499, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004983318038284779, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.26241734623908997, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24412699043750763, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23752836883068085, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21352119743824005, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12376624345779419, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11665989458560944, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14083296060562134, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12921008467674255, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12580782175064087, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10985594987869263, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.1038234755396843, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07171987742185593, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06176787242293358, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05921841785311699, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.058604687452316284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.035638656467199326, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03022727742791176, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029989738017320633, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02737264893949032, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026964962482452393, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01856936141848564, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018130550161004066, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01761331968009472, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01141803152859211, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018130550161004066, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01141803152859211, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.25769880414009094, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22902266681194305, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21922115981578827, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18624331057071686, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12040722370147705, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10950793325901031, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13919536769390106, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12726518511772156, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1238042488694191, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09877196699380875, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0904339998960495, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07189522683620453, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.061937976628541946, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05884105712175369, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0580812469124794, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03629758581519127, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.031957950443029404, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.031753651797771454, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02794530987739563, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02746245265007019, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.020390748977661133, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.021879538893699646, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01947057992219925, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016986582428216934, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016986582428216934, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016986582428216934, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18445059657096863, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17205356061458588, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1680026799440384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15161287784576416, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08729023486375809, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08273609727621078, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09789785742759705, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09023072570562363, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0886584222316742, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07778974622488022, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07371809333562851, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05011821538209915, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0432865247130394, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04195202514529228, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04164228215813637, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02476123347878456, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02193766087293625, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02184414118528366, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02009112574160099, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019891249015927315, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01342049241065979, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013946929946541786, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012988582253456116, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009924615733325481, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01342049241065979, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009924615733325481, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.25221726298332214, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23554515838623047, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23025749623775482, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20786814391613007, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11917338520288467, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11312282085418701, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1335509866476059, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12285623699426651, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12101775407791138, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1062588021159172, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10063111037015915, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06790172308683395, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.058560561388731, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05689820274710655, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05651267245411873, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033545177429914474, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028968051075935364, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028850583359599113, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02634013630449772, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026083439588546753, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017759254202246666, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017256639897823334, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017189670354127884, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010875297710299492, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017759254202246666, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010875297710299492, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.284511923789978, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.25138065218925476, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23897568881511688, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.20935137569904327, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.132163405418396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11901267617940903, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1560303270816803, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.142924502491951, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13708850741386414, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11121606081724167, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10396157950162888, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.08014773577451706, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06855928152799606, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06365000456571579, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.062484171241521835, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.039806146174669266, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03342268988490105, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.033181916922330856, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.029382551088929176, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02857615053653717, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.02204936556518078, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.021836206316947937, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.020460618659853935, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015283188782632351, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015283188782632351, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015283188782632351, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13255304098129272, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12308349460363388, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11919106543064117, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10691195726394653, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06210865452885628, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05826590582728386, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07133492082357407, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06566675007343292, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06314990669488907, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.055068064481019974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.052159011363983154, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03633750230073929, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03139495104551315, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.029761720448732376, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029374150559306145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018056664615869522, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015323718078434467, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015159781090915203, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01390809565782547, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013658100739121437, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009505107998847961, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009465692564845085, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008963831700384617, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006211775820702314, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018056664615869522, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006211775820702314, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10883939266204834, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10093306750059128, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0971442386507988, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08715636283159256, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05088194087147713, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04743640124797821, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05919218063354492, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05471356585621834, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.051767971366643906, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04511130973696709, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04271700605750084, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03004920296370983, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026126986369490623, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.024384601041674614, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02396579273045063, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014947501011192799, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01258602924644947, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012397888116538525, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011434325948357582, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011164355091750622, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007892454043030739, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00788032915443182, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0073599861934781075, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005185477435588837, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014947501011192799, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005185477435588837, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.26048704981803894, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24255338311195374, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2361469268798828, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21235568821430206, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12277442216873169, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1159345880150795, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.139437735080719, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12788359820842743, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12479560822248459, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10906213521957397, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10313262045383453, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07100950926542282, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.061098452657461166, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05876636505126953, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05820087715983391, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.035291220992803574, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029981037601828575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029781524091959, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02716633677482605, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02680419385433197, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018380558118224144, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017933735623955727, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017483234405517578, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011304466985166073, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017933735623955727, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011304466985166073, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2552125155925751, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2292826622724533, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.220650315284729, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18500201404094696, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12048880010843277, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.110844187438488, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13840356469154358, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12658248841762543, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12311705201864243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0996016189455986, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08860892057418823, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07144790142774582, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06149012967944145, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05874195322394371, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.058084964752197266, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.035861607640981674, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03162362799048424, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03141811117529869, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02768580988049507, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.027274463325738907, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.020014585927128792, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.02128908596932888, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.019184641540050507, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016278328374028206, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016278328374028206, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016278328374028206, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18763069808483124, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1751788854598999, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17110224068164825, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15450643002986908, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08883869647979736, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08425243198871613, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09951417148113251, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09173370897769928, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09024929255247116, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07921921461820602, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07498999685049057, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0508795790374279, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04395131394267082, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04263685643672943, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04233712702989578, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025165900588035583, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.022190270945429802, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022096315398812294, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02029992826282978, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020103512331843376, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013624045066535473, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013937363401055336, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013198102824389935, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009745477698743343, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013624045066535473, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009745477698743343, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2566019296646118, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23972323536872864, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23434869945049286, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2116260975599289, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12127730995416641, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11516707390546799, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13599561154842377, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12492126226425171, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12311537563800812, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10817310959100723, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10244156420230865, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06909489631652832, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05957130342721939, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05791231989860535, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05752911791205406, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03416260704398155, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.029510920867323875, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.029394758865237236, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02684677019715309, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026594240218400955, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.018204888328909874, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01761000230908394, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01765253022313118, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01115886215120554, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.018204888328909874, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01115886215120554, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2852596938610077, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2520385682582855, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23948486149311066, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.20999391376972198, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13250480592250824, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.1191483736038208, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.15643900632858276, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14350071549415588, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13742825388908386, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11157573759555817, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10443331301212311, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.08029384911060333, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06903313100337982, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06393980234861374, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.0627124235033989, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03984280675649643, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03375229611992836, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03348956257104874, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.029735177755355835, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.028906529769301414, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.021791644394397736, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.022283406928181648, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.02007809840142727, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01585070975124836, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01585070975124836, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01585070975124836, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13412165641784668, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12469493597745895, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12077305465936661, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10861251503229141, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06297432631254196, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.059157539159059525, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07213888317346573, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06647656857967377, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06401517242193222, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05592518672347069, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05299125984311104, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03676852583885193, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.031802963465452194, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03019542060792446, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02980903349816799, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018289338797330856, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01557710487395525, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015414521098136902, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014163263142108917, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013913094997406006, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009665763936936855, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009648258797824383, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009144037030637264, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00639503076672554, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018289338797330856, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00639503076672554, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11309415102005005, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10507255792617798, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10139451175928116, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09115778654813766, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.052975594997406006, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04955647140741348, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06129912659525871, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05660659819841385, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05388842150568962, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.047080982476472855, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04456483945250511, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031117429956793785, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027029696851968765, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025385672226548195, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024988163262605667, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015479733236134052, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013047832064330578, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012875919230282307, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011857117526233196, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011602410115301609, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008134630508720875, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008070598356425762, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007622480392456055, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005233916454017162, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015479733236134052, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005233916454017162, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.26503437757492065, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24692438542842865, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.24072594940662384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2169537991285324, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12502636015415192, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11825398355722427, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14142560958862305, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12977243959903717, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12702015042304993, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11118985712528229, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.1052054688334465, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07193659991025925, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.061972726136446, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0597788505256176, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05925891548395157, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03575896844267845, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030391106382012367, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03021296299993992, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027535824105143547, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.027188003063201904, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01851549558341503, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017983991652727127, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017673756927251816, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011099363677203655, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017983991652727127, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011099363677203655, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.27246949076652527, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2426818609237671, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23295234143733978, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19318504631519318, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12865369021892548, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11753851175308228, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14682145416736603, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.13489912450313568, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.131715789437294, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.104555144906044, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0942203626036644, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07609981298446655, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06569946557283401, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.06290610134601593, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.06224548816680908, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03839060664176941, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0341256819665432, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03393840789794922, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.029619360342621803, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02919515036046505, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.021828703582286835, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.023238690569996834, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.020989542827010155, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.018063105642795563, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.018063105642795563, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.018063105642795563, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1950685977935791, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18210183084011078, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1779620349407196, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16076253354549408, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09239297360181808, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08766771852970123, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1032644733786583, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09531120955944061, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09381910413503647, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08239374309778214, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0779813751578331, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.052829451858997345, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04562830552458763, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04429752752184868, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04399475082755089, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.026054907590150833, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.022982198745012283, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022884996607899666, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021013503894209862, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020815743133425713, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013999936170876026, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01435756217688322, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013564850203692913, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009942560456693172, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013999936170876026, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009942560456693172, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2635906934738159, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.24629543721675873, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.24092546105384827, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21758270263671875, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12468301504850388, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1184229776263237, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13939368724822998, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12839244306087494, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1266237497329712, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11127954721450806, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10532519966363907, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0708872601389885, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.06120605394244194, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05951705202460289, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05912650376558304, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.034952834248542786, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.030289476737380028, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.030174702405929565, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02756015956401825, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.027291903272271156, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01840115524828434, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.018007708713412285, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017822926864027977, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011310513131320477, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.018007708713412285, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011310513131320477, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.28741714358329773, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.25326797366142273, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.24007344245910645, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.21078014373779297, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13335958123207092, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11952079832553864, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1580406278371811, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1451716423034668, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13855771720409393, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11218710243701935, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10504855215549469, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.08090976625680923, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06965207308530807, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06427876651287079, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.06298080831766129, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.04000962898135185, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03376063331961632, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03348616138100624, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.0296735018491745, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.028802597895264626, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.02170458249747753, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.02214326523244381, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.019935065880417824, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015500277280807495, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015500277280807495, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015500277280807495, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1335650533437729, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12401022017002106, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12007486075162888, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10790518671274185, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06267500668764114, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05880654975771904, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07206328958272934, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06629927456378937, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0637262687087059, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05562131479382515, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.052749473601579666, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03675209730863571, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.031711868941783905, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030069619417190552, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029673049226403236, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018285905942320824, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015547302551567554, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015387809835374355, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014141722582280636, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013893901370465755, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0096850311383605, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009695063345134258, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009143401868641376, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0064925625920295715, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018285905942320824, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0064925625920295715, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11365652084350586, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10549147427082062, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1015968918800354, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09123524278402328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05320300534367561, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04964384064078331, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0618588887155056, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05715828761458397, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05411580204963684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04721272364258766, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04471411183476448, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0313880480825901, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027283350005745888, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025498634204268456, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025075331330299377, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015614643692970276, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013144646771252155, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012951540760695934, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011936157010495663, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011660776101052761, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008223139680922031, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008198671042919159, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007680103182792664, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005366755183786154, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015614643692970276, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005366755183786154, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.26635321974754333, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24799279868602753, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.24171118438243866, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21762454509735107, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12567582726478577, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11872263997793198, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14240579307079315, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.13067984580993652, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1276816576719284, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11169008165597916, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10563527792692184, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07255010306835175, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06240520626306534, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.06011435016989708, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0595749132335186, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03601211681962013, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030608782544732094, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03041522018611431, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027726946398615837, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.027373379096388817, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01867407001554966, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018181314691901207, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017797475680708885, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01133138220757246, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018181314691901207, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01133138220757246, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.25762519240379333, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22870533168315887, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21942798793315887, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18807446956634521, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12073314189910889, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1098228469491005, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13979099690914154, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12757818400859833, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.124208964407444, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1003752276301384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09253139793872833, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07248993963003159, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06215719133615494, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05907420441508293, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.058339472860097885, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03666903078556061, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03220595419406891, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03200305625796318, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02844483032822609, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0279722660779953, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.021052183583378792, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.022171467542648315, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.02013460360467434, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.017328711226582527, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.017328711226582527, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.017328711226582527, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20063979923725128, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18744443356990814, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18321006000041962, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16549114882946014, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09505195170640945, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09021774679422379, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10639601200819016, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09800642728805542, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09652941673994064, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08481394499540329, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0804290696978569, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05441191792488098, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04695265740156174, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04560841992497444, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04530361294746399, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.026903346180915833, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023745408281683922, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023653823882341385, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021741000935435295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02153819240629673, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014617419801652431, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014933198690414429, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014181891456246376, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010473789647221565, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014617419801652431, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010473789647221565, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2650378346443176, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.24779723584651947, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.24233241379261017, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21899418532848358, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12540994584560394, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11914797127246857, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.14023195207118988, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12911921739578247, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1273421198129654, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11198277771472931, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10603659600019455, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0713455080986023, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.06158716231584549, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05990731716156006, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.059511877596378326, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03524209186434746, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.030521372333168983, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.030405128374695778, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.027786264196038246, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.027527248486876488, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.0186911728233099, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.018198879435658455, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.018127260729670525, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011520273052155972, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.018198879435658455, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011520273052155972, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.288394033908844, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2530767619609833, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23923516273498535, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.2101200670003891, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13353806734085083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.1192476898431778, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1588929146528244, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14596621692180634, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1389668732881546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11198794841766357, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10501333326101303, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0813361406326294, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06983265280723572, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06424681842327118, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.06288819760084152, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.04016795754432678, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.033523011952638626, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.033228304237127304, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.029319841414690018, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02839686907827854, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.02173922024667263, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.021750934422016144, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01989426277577877, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014841294847428799, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014841294847428799, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014841294847428799, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12984174489974976, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12077819555997849, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11699102073907852, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10535859316587448, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06078679487109184, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05707110837101936, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0697348415851593, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0643640086054802, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.061825137585401535, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0541253499686718, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.051259737461805344, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03544977679848671, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.030764533206820488, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0291422288864851, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02875458635389805, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017704471945762634, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015001191757619381, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014840218238532543, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01364404708147049, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013396679423749447, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009282905608415604, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009208180010318756, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008753792382776737, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005999742075800896, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017704471945762634, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005999742075800896, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1111295223236084, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1032661572098732, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09954208880662918, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0895320400595665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05194258317351341, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04852709919214249, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.060269441455602646, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05572105199098587, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.052827365696430206, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04620499536395073, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.043758075684309006, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.030590737238526344, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026639247313141823, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02491484023630619, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02450212836265564, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01527097076177597, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012852589599788189, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012665371410548687, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011689930222928524, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011424063704907894, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008008643984794617, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0079896105453372, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007473004516214132, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0052176970057189465, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01527097076177597, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0052176970057189465, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2598296105861664, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24220366775989532, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23581965267658234, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21247684955596924, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12216613441705704, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11545310169458389, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13835357129573822, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12730993330478668, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12415122985839844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10881521552801132, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.1029248982667923, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07037860155105591, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0608048215508461, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05846373364329338, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.057910408824682236, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03507426381111145, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02981065772473812, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02961006760597229, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027050992473959923, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02669282630085945, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01814192906022072, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017700670287013054, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017271338030695915, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011033188551664352, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01814192906022072, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011033188551664352, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2511571943759918, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22059957683086395, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2095792144536972, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1749815195798874, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11658574640750885, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10541597753763199, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1377301663160324, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12628448009490967, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12124069035053253, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0954299196600914, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08727167546749115, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07140236347913742, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.061387717723846436, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05701151117682457, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.055947959423065186, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.036316629499197006, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03108219802379608, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.030781863257288933, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027160391211509705, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026476427912712097, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.02088768780231476, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.021567776799201965, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01957707665860653, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016683870926499367, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016683870926499367, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016683870926499367, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1951950341463089, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18254876136779785, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17847506701946259, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16133885085582733, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09222471714019775, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08760455995798111, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10290596634149551, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09504979103803635, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09362421184778214, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08244630694389343, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07800480723381042, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05251612886786461, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04559018835425377, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04430435225367546, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04400533810257912, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.026243600994348526, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02304314449429512, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02295145019888878, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02108946442604065, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02089841663837433, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014022625982761383, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01429557241499424, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013606163673102856, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009899926371872425, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014022625982761383, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009899926371872425, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.259356826543808, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.24270133674144745, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23744118213653564, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21476951241493225, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12238515913486481, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11636408418416977, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1364537477493286, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12595687806606293, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12423095852136612, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10949474573135376, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10363175719976425, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0693623423576355, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.06019069626927376, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.058577656745910645, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05818967893719673, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03457627817988396, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.029918989166617393, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02981259860098362, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.027262596413493156, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02701312117278576, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.018048686906695366, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017737342044711113, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017506154254078865, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01127182599157095, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.018048686906695366, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01127182599157095, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2839437425136566, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2492438107728958, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23569796979427338, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.20770740509033203, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13141801953315735, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.1172822117805481, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.15685322880744934, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14378906786441803, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13671015202999115, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11070559173822403, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10376352071762085, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.08034136891365051, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06928610056638718, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0636681318283081, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.06227788329124451, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.04029271751642227, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.033817362040281296, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.0335172675549984, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.029805300757288933, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.028909383341670036, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.021939288824796677, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.022510912269353867, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.02011304907500744, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.016206884756684303, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.016206884756684303, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.016206884756684303, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12869511544704437, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11972560733556747, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11597424000501633, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10438738018274307, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06030662730336189, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.056655775755643845, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06916055828332901, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06379365175962448, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.061275094747543335, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0536767914891243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05092219263315201, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.035217415541410446, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.030513698235154152, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02893347106873989, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028549669310450554, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017602408304810524, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014921761117875576, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014766541309654713, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013582161627709866, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013341802172362804, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009271176531910896, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009200021624565125, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00876086950302124, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0060709440149366856, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017602408304810524, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0060709440149366856, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10875160992145538, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10103758424520493, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09724932909011841, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08744306862354279, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05077750235795975, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04736854508519173, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05920108035206795, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05478708818554878, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.051629383116960526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04522080346941948, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.042873039841651917, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03004310466349125, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026172522455453873, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.024366607889533043, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.023931972682476044, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015021748840808868, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012606844305992126, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012406579218804836, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011486350558698177, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011210585944354534, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007926687598228455, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007910159416496754, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007371143437922001, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005228668451309204, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015021748840808868, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005228668451309204, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2676135003566742, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24955344200134277, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2434694617986679, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21948519349098206, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12600819766521454, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11924856156110764, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14209727942943573, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.13071171939373016, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1279754787683487, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11229696124792099, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10618456453084946, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07232063263654709, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06243747100234032, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.060244329273700714, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0597289577126503, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03603703901171684, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030668888241052628, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.030490798875689507, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027826813980937004, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0274907648563385, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01859847642481327, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01808919571340084, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017768125981092453, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011177475564181805, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01808919571340084, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011177475564181805, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22743602097034454, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19833199679851532, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18860836327075958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16065986454486847, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10644592344760895, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09561246633529663, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12335975468158722, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11292185634374619, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10914555937051773, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08518437296152115, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07919000834226608, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06362616270780563, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.055267930030822754, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05234898254275322, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.051640063524246216, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.032263197004795074, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029005110263824463, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028792565688490868, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025146612897515297, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024731231853365898, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018509993329644203, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.02043790929019451, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01765919290482998, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01639622636139393, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01765919290482998, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01639622636139393, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18849040567874908, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17624644935131073, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17239026725292206, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1558818221092224, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08898146450519562, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08453791588544846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0990690216422081, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09164687246084213, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09031098335981369, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07960408180952072, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0753166675567627, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.050539739429950714, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04390217736363411, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04269000142812729, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04239865764975548, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025196129456162453, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.022108571603894234, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022025464102625847, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.020220106467604637, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020038528367877007, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013329281471669674, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013588926754891872, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012934169732034206, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009263796731829643, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013329281471669674, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009263796731829643, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.25083550810813904, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23476774990558624, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2297486811876297, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20789866149425507, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11832060664892197, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.112556591629982, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13163389265537262, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12170165032148361, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12009742110967636, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10594411939382553, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10018084198236465, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06698518246412277, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05812215432524681, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.056577350944280624, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.056210119277238846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033326465636491776, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028806841000914574, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02869977429509163, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026239803060889244, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026001468300819397, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017290756106376648, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01694287545979023, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01676907017827034, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010570215992629528, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017290756106376648, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010570215992629528, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.27532270550727844, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.24132303893566132, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22793184220790863, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.20093820989131927, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1269388049840927, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11319243162870407, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.15175938606262207, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1393338441848755, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13217194378376007, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10694102197885513, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10038883984088898, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07763620465993881, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06695001572370529, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06135689094662666, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05997467786073685, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03888937830924988, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.032348956912755966, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.032055001705884933, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.028443310409784317, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.027548223733901978, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.021139925345778465, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.021283941343426704, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.019342979416251183, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014979361556470394, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014979361556470394, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014979361556470394, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13312415778636932, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12424272298812866, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12070529907941818, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10897745937108994, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06249780207872391, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05893740803003311, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07101920247077942, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06567113101482391, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06347591429948807, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05582943558692932, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.052851419895887375, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03607819229364395, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03136993199586868, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02994159795343876, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029594814404845238, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01802237145602703, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015342973172664642, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01520545780658722, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013975681737065315, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013755204156041145, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00941665843129158, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009282132610678673, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008942686952650547, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005943373776972294, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01802237145602703, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005943373776972294, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11695650964975357, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10909900814294815, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10575064271688461, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09542746096849442, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05485968664288521, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05162046477198601, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06285490840673447, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05804364010691643, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05574635788798332, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.048996228724718094, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.046515870839357376, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0319465734064579, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027721688151359558, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026283858343958855, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025943448767066002, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015936048701405525, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013474831357598305, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013328849337995052, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01227661408483982, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012055540457367897, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008310637436807156, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008189162239432335, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007843350060284138, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005228432361036539, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015936048701405525, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005228432361036539, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.26881614327430725, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2514636516571045, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2457348108291626, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.22197213768959045, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1268198937177658, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1203530952334404, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14220012724399567, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.13108976185321808, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12870679795742035, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11334647238254547, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10725487768650055, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07234843820333481, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06262022256851196, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.060649048537015915, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.06017979606986046, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03604859858751297, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030857840552926064, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.030696138739585876, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.028061211109161377, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.027755310758948326, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01860669068992138, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018146734684705734, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017876777797937393, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011216334067285061, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018146734684705734, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011216334067285061, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2545262575149536, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21408145129680634, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19920718669891357, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16132615506649017, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11817794293165207, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10177335888147354, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14040863513946533, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12889261543750763, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12261486053466797, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08882233500480652, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08062957972288132, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07230063527822495, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06241040304303169, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.057486601173877716, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.056292444467544556, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03646261617541313, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03093268908560276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.030532345175743103, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025430593639612198, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024611592292785645, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0202882532030344, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.02103252522647381, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018794279545545578, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015735043212771416, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015735043212771416, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015735043212771416, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19927649199962616, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18642759323120117, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18233703076839447, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16489005088806152, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09407974779605865, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08944123238325119, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1047147884964943, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09686186909675598, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09547524154186249, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08415769040584564, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07963995635509491, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05343497544527054, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.046388182789087296, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04512589052319527, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04482923820614815, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02662155032157898, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0233335979282856, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023246390745043755, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02133897878229618, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021151287481188774, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014066807925701141, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01429461594671011, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013647186569869518, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009684272110462189, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014066807925701141, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009684272110462189, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.25840792059898376, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.24205462634563446, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2369246780872345, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2144099771976471, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12191946804523468, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11600010842084885, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13575921952724457, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12538020312786102, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12370163202285767, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10919018089771271, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10342112928628922, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06908350437879562, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05987703427672386, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05830397829413414, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05793188884854317, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.034441959112882614, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02974223531782627, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.029638219624757767, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02711728774011135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026876261457800865, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01800907962024212, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01757408119738102, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01748821698129177, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011098315939307213, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01800907962024212, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011098315939307213, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.27454426884651184, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.24039669334888458, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22649219632148743, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.20007474720478058, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12654906511306763, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11236337572336197, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.15210680663585663, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.13978073000907898, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1320532262325287, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10677559673786163, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10044053941965103, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0777578055858612, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06722145527601242, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06117343530058861, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05967685580253601, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.038907911628484726, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03224584087729454, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.031904436647892, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02836996130645275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.027400298044085503, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.0209366325289011, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.021263383328914642, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.018995270133018494, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014884027652442455, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014884027652442455, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014884027652442455, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1325652301311493, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12350084632635117, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11981002241373062, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10798535495996475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06219853088259697, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0585254542529583, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07092081755399704, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06560292840003967, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0632147341966629, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05544951558113098, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05246974900364876, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03605976328253746, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03136623278260231, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02980637364089489, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0294345710426569, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018018445000052452, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015316913835704327, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015163528732955456, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013938277959823608, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01370016485452652, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00944212730973959, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00934369582682848, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008933240547776222, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006054622586816549, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018018445000052452, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006054622586816549, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11316361278295517, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10541526228189468, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10187939554452896, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09186606854200363, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05304728075861931, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04969915375113487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.061283934861421585, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05655262991786003, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05389980599284172, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04729544743895531, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04487214609980583, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031116103753447533, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027040740475058556, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025425298139452934, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02504017949104309, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015547123737633228, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013068008236587048, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012894667685031891, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011899301782250404, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011652563698589802, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00813269056379795, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008031782694160938, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007617190480232239, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005175744649022818, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015547123737633228, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005175744649022818, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.26888641715049744, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2510821223258972, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2451459914445877, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.22131679952144623, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12672856450080872, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.12007397413253784, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14250263571739197, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1311994045972824, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1287134438753128, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11308906227350235, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10704305022954941, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0725046917796135, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06266289204359055, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.06062702089548111, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.060133762657642365, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.036115702241659164, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030831340700387955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03066832385957241, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.028000859543681145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.027683574706315994, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01862507313489914, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01815352775156498, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017852984368801117, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011205396614968777, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01815352775156498, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011205396614968777, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23153071105480194, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19766765832901, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18697302043437958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16013257205486298, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10743405669927597, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09447179734706879, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12437053769826889, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1143796369433403, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11143969744443893, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08627620339393616, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07710479944944382, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06405355781316757, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05528503656387329, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05213354900479317, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05136534571647644, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.032135263085365295, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027780691161751747, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02761065773665905, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023741012439131737, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023237138986587524, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017573870718479156, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01841743104159832, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016582580283284187, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013612992130219936, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017573870718479156, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013612992130219936, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20523515343666077, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19197149574756622, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18786738812923431, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16984440386295319, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0969100147485733, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09211741387844086, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10779418796300888, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09972703456878662, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0983559638261795, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08668651431798935, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08199822157621384, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05498417839407921, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04775713011622429, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04647143930196762, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.046166982501745224, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.027393853291869164, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024012060835957527, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023924505338072777, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.0219547338783741, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021758463233709335, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014458263292908669, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014674047939479351, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014040551148355007, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009910762310028076, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014458263292908669, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009910762310028076, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.26061415672302246, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2439834326505661, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23885023593902588, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21615254878997803, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12298460304737091, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11699322611093521, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13671299815177917, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1264558583498001, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12478666752576828, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1101313978433609, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.1041874811053276, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06959730386734009, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.060385238379240036, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05879524350166321, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05841740593314171, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03462459146976471, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.029917528852820396, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.0298136156052351, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.027255674824118614, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.027017703279852867, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01796625554561615, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017567407339811325, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017433319240808487, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010928427800536156, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01796625554561615, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010928427800536156, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2786770761013031, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.24451330304145813, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23067890107631683, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.2038978785276413, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12845829129219055, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11432885378599167, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1542070358991623, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14162865281105042, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13401302695274353, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10868570953607559, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10213114321231842, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0788026973605156, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06802713125944138, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06205124408006668, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.06055650860071182, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.039691999554634094, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03259679675102234, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.032254528254270554, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.028712213039398193, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.0277352724224329, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.021768007427453995, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.02135106362402439, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01989406906068325, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014783213846385479, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014783213846385479, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014783213846385479, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1341456025838852, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12503758072853088, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12130361795425415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10938085615634918, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06298402696847916, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.059229202568531036, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0721898227930069, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06648764759302139, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06398327648639679, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.056177813559770584, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05326789245009422, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.036676544696092606, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03178231790661812, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03020096756517887, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029811514541506767, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01836204342544079, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01553368754684925, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015373221598565578, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014149466529488564, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013906207866966724, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009647316299378872, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009499140083789825, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009115866385400295, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006173971574753523, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01553368754684925, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006173971574753523, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11717275530099869, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1091727539896965, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10570026934146881, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09526907652616501, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05496379733085632, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0515960156917572, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06311658769845963, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05832235887646675, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05584800988435745, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.049012552946805954, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04646913707256317, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0320650115609169, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027860287576913834, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026336345821619034, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025971654802560806, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016004929319024086, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013500995002686977, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013346766121685505, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01228634174913168, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012049894779920578, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00833605695515871, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008218573406338692, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007839142344892025, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005242386367172003, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016004929319024086, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005242386367172003, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2680140435695648, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2503691613674164, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.24449990689754486, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.22074580192565918, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12636157870292664, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1197839304804802, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14205168187618256, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.13084648549556732, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1283418834209442, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1128539964556694, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10676711797714233, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07225704193115234, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06251128762960434, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.060477115213871, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05998469144105911, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.036000773310661316, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030752388760447502, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.030588215216994286, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027943680062890053, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.027634281665086746, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018563982099294662, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01809990219771862, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017797622829675674, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011173914186656475, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01809990219771862, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011173914186656475, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22683267295360565, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19508840143680573, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18549223244190216, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15316973626613617, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1030072495341301, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09170202910900116, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12092294543981552, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10923551768064499, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10689561814069748, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0830310508608818, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07446662336587906, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06179562211036682, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05370185896754265, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05093657970428467, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.050246257334947586, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.032012272626161575, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028614381328225136, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028484642505645752, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025042224675416946, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02463303692638874, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.019062506034970284, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.02053336426615715, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018282350152730942, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016780506819486618, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018282350152730942, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016780506819486618, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21322229504585266, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19954876601696014, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1952688992023468, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17652326822280884, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10072926431894302, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09577146172523499, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11219272762537003, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10368464142084122, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10226576775312424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0901230201125145, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08525162190198898, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.057210177183151245, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04962824285030365, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04829397425055504, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04797075316309929, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028514439240098, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024891674518585205, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024799931794404984, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022738782688975334, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02253922075033188, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015040583908557892, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015128052793443203, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014604565687477589, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010100950486958027, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015040583908557892, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010100950486958027, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.26491865515708923, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.24808338284492493, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.24283912777900696, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.219752237200737, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12513647973537445, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11903782188892365, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13928191363811493, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12867306172847748, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12697021663188934, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11202799528837204, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10605525225400925, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.07098811119794846, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0614982545375824, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05988756939768791, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.059499453753232956, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.035396747291088104, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.03058263100683689, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.030470822006464005, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02788119576871395, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.027636218816041946, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.018557850271463394, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.018126308917999268, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.0180282611399889, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01152102928608656, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.018126308917999268, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01152102928608656, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2795259356498718, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2464728206396103, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23298971354961395, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.20600906014442444, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12897181510925293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11532142013311386, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.15422552824020386, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14212378859519958, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13433532416820526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10964953899383545, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10297482460737228, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07889736443758011, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.0681750699877739, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06216084584593773, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.060671817511320114, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.039511341601610184, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03238063305616379, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03203548118472099, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02852100133895874, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02754846028983593, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.021226074546575546, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.020848609507083893, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.019305629655718803, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013992831110954285, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013992831110954285, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013992831110954285, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13822396099567413, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12874338030815125, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1249731108546257, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11274012178182602, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06492172181606293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.061131756752729416, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07403676211833954, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0683734342455864, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06595255434513092, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0578971691429615, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05481356382369995, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.037615157663822174, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0326678529381752, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.031117016449570656, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.030737509950995445, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01878385804593563, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015973709523677826, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01582184247672558, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0145341781899333, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014293618500232697, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00982214231044054, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009709054604172707, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009311678819358349, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006254643201828003, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015973709523677826, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006254643201828003, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1194264218211174, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1113007441163063, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1077725812792778, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09716936200857162, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05607280135154724, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.052655141800642014, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0644778460264206, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.059509098529815674, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05698678642511368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05003459379076958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.047459281980991364, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03276674076914787, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028447067365050316, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02687607705593109, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02649778686463833, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01635933108627796, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013780324719846249, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01361954677850008, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012541248463094234, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01230291835963726, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008527001366019249, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00839050393551588, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008022765628993511, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005350187886506319, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01635933108627796, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005350187886506319, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2753615379333496, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2571546137332916, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2511845827102661, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.22688786685466766, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12987248599529266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.12314938753843307, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1457541137933731, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1342945694923401, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.13190050423145294, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11595278978347778, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10972271114587784, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07417696714401245, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06415988504886627, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.062129437923431396, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.061653006821870804, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.036958422511816025, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.031580351293087006, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.031432073563337326, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.028691036626696587, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02837522141635418, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.019027451053261757, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018540838733315468, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01825333945453167, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011398253031075, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01825333945453167, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011398253031075, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2358560562133789, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20956943929195404, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1987030953168869, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16973859071731567, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11050454527139664, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09925507009029388, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13159990310668945, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12046002596616745, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11350661516189575, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09130758792161942, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08369147032499313, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06786849349737167, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05813834071159363, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05356394872069359, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05244100093841553, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03416222333908081, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028428826481103897, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027977529913187027, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024720964953303337, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023993337526917458, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018863404169678688, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01888791099190712, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017430583015084267, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013630300760269165, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017430583015084267, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013630300760269165, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21713653206825256, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20320682227611542, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1988421529531479, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17963670194149017, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10262925922870636, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09758677333593369, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11427464336156845, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10560665279626846, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10416002571582794, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09178145974874496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08686477690935135, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05828404426574707, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.050567544996738434, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.049207672476768494, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04888409376144409, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0290557648986578, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02538619376718998, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025294020771980286, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.023192448541522026, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022986793890595436, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015357966534793377, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015455462038516998, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014909549616277218, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010361189022660255, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015357966534793377, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010361189022660255, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2670253813266754, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.24991925060749054, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2446342557668686, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.22130799293518066, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1261247843503952, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11996114253997803, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.140333354473114, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1297338455915451, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12801912426948547, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11285930126905441, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10684945434331894, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.07145793735980988, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.06198408082127571, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.06034315750002861, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0599493645131588, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03558984771370888, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.03079105354845524, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.03067871369421482, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02805696241557598, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.027802934870123863, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01860087178647518, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01821247860789299, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.018053894862532616, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011509308591485023, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01821247860789299, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011509308591485023, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.27888959646224976, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.24596548080444336, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23227940499782562, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.20564503967761993, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12861548364162445, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11493583023548126, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1544196903705597, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14217086136341095, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13393960893154144, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.109463632106781, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10290326923131943, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07880263030529022, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06808628141880035, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0619470439851284, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.060418497771024704, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03933044895529747, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03220362588763237, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03182944282889366, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02837221696972847, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02736961655318737, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.020906733348965645, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.020684950053691864, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01889624260365963, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01375302579253912, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01375302579253912, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01375302579253912, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13687685132026672, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12758387625217438, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12384658306837082, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11172854900360107, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06435256451368332, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06054334715008736, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07364391535520554, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06786201149225235, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06538817286491394, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0573972724378109, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05448419973254204, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03745190426707268, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03245560824871063, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03084685280919075, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.030461255460977554, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018702886998653412, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01581679843366146, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01565859466791153, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01439023669809103, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014145344495773315, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00976936612278223, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00959873478859663, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009247087873518467, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00614344235509634, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01581679843366146, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00614344235509634, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12043583393096924, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11228110641241074, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10868073999881744, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09813425689935684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05654191970825195, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05312122032046318, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06526216119527817, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06002302095293999, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.057472046464681625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05044740065932274, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04797014221549034, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.033141281455755234, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028673365712165833, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.027113014832139015, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026734797284007072, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016544725745916367, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01390907820314169, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013748332858085632, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012656057253479958, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012414516881108284, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00863861944526434, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008467158302664757, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008141773752868176, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005399708170443773, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016544725745916367, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005399708170443773, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.27611595392227173, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.25797638297080994, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2519143521785736, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2276090383529663, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1304619312286377, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.12369018793106079, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14649103581905365, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.13489902019500732, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.13249549269676208, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11643215268850327, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.11019235104322433, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07461835443973541, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06447721272706985, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.06245831772685051, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.06196408346295357, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.037186432629823685, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03184857591986656, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03169527277350426, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02893870882689953, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.028627315536141396, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.019243547692894936, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018856145441532135, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018475236371159554, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011831115931272507, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011831115931272507, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011831115931272507, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23798398673534393, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20017185807228088, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18697905540466309, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15203647315502167, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10858545452356339, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09394540637731552, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12953971326351166, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11826954036951065, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11399348080158234, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08377733826637268, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07631652802228928, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0666104108095169, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05779750272631645, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0534491129219532, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05238240957260132, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.033673256635665894, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02951619029045105, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029263371601700783, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02505854144692421, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02437395602464676, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.019059015437960625, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.02093609981238842, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0177496075630188, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01656016707420349, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0177496075630188, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01656016707420349, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21900849044322968, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20481140911579132, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.20038016140460968, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18127195537090302, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1035563126206398, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09843066334724426, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11529926210641861, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10662497580051422, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10511378198862076, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09260505437850952, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08762358129024506, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05886493995785713, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.051043152809143066, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0496663972735405, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04934035241603851, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.029330644756555557, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02561456896364689, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02552695944905281, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.023396451026201248, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.023192614316940308, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015489257872104645, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015594321303069592, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015036793425679207, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010448861867189407, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015489257872104645, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010448861867189407, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2675189673900604, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2503422498703003, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.24499952793121338, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.22171631455421448, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1263800710439682, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1201433315873146, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.14063714444637299, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1299985647201538, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12825752794742584, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11305627971887589, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10699894279241562, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.07153869420289993, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0621018223464489, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.060446467250585556, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.06005130335688591, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0356343649327755, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.03079124353826046, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.030682319775223732, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.028038455173373222, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02778726816177368, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01856420375406742, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.018143048509955406, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01801336742937565, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011364057660102844, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.018143048509955406, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011364057660102844, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2754215598106384, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2436847984790802, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23019357025623322, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.20388586819171906, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1271776556968689, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11390817165374756, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.15298157930374146, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14082713425159454, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13247673213481903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.1087697297334671, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10226301103830338, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07837564498186111, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06772372126579285, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06146404147148132, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.059908609837293625, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03943360596895218, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.032294388860464096, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03188460320234299, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.028656786307692528, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.027650127187371254, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.021404724568128586, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.021194031462073326, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.019363729283213615, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014643937349319458, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014643937349319458, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014643937349319458, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.14791321754455566, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13792262971401215, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.13408182561397552, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12100648134946823, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06964512914419174, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06564261019229889, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07898395508527756, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07302215695381165, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07074100524187088, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.062057286500930786, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0587143674492836, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04010234773159027, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.034882403910160065, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.033343009650707245, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0329732708632946, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.020017949864268303, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.017048178240656853, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.016902262344956398, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.015492659993469715, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.015255334787070751, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010406962595880032, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01023692823946476, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009893756359815598, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006463681347668171, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.017048178240656853, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006463681347668171, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1283978521823883, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11975046247243881, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11629704385995865, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10485140234231949, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.060462601482868195, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0569416768848896, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06908611208200455, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06364321708679199, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.061435479670763016, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05394682288169861, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05110294744372368, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.035140398889780045, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.030414370819926262, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028974413871765137, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028628582134842873, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0175455454736948, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014827563427388668, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014688490889966488, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013484619557857513, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013263053260743618, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009141839109361172, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008940032683312893, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008667238987982273, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005660926457494497, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0175455454736948, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005660926457494497, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2804655134677887, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.26189157366752625, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.25591763854026794, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.23125909268856049, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.13249360024929047, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.12566430866718292, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14841261506080627, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.13677310943603516, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.13454879820346832, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11825012415647507, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.11181976646184921, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07554974406957626, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06535577028989792, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.06337429583072662, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.06290977448225021, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.037639960646629333, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.032236769795417786, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03209349140524864, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.029272157698869705, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02896801196038723, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.019424259662628174, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01892821304500103, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018684690818190575, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011686768382787704, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011686768382787704, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011686768382787704, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2368578016757965, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20643460750579834, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19487442076206207, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16587421298027039, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10995163023471832, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09765245765447617, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13015961647033691, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11997278779745102, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11381403356790543, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08959746360778809, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08212977647781372, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06687965244054794, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05770564079284668, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05310162901878357, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.051966674625873566, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.033545300364494324, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02780497632920742, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02739262953400612, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02378012239933014, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02300603874027729, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017971960827708244, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018012724816799164, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016482843086123466, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0124219274148345, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017971960827708244, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0124219274148345, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22339247167110443, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20888519287109375, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.20426414906978607, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1847107857465744, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10568392276763916, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10041296482086182, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11770828068256378, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1087883859872818, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10728712379932404, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09443233162164688, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08923806250095367, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06009902432560921, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.052083879709243774, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05066394805908203, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05032441020011902, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0299584548920393, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026091409847140312, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025997037068009377, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02380816452205181, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02359372191131115, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015840088948607445, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01581726036965847, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015374928712844849, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010509205050766468, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015840088948607445, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010509205050766468, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.270298570394516, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2528230845928192, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.24741747975349426, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.22380128502845764, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12777850031852722, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.12144261598587036, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.14216497540473938, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.13144493103027344, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12969155609607697, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11421713978052139, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10802791267633438, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.07237152010202408, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.06277468055486679, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.061098612844944, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.06069479137659073, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.036024365574121475, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.031095653772354126, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.03098507598042488, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.028298089280724525, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.028042182326316833, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.018746396526694298, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01828284189105034, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.018187671899795532, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01139577105641365, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01828284189105034, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01139577105641365, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.28325143456459045, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2516883313655853, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23850469291210175, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.21127156913280487, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13102129101753235, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11792441457509995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.15652361512184143, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14415399730205536, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13613277673721313, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11217164993286133, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10534616559743881, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07968109101057053, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06898017972707748, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06300883740186691, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.06152340769767761, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.039837535470724106, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.0325772799551487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.032192476093769073, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.028761452063918114, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.027788374572992325, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.021152377128601074, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.020626407116651535, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.019209008663892746, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013396017253398895, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013396017253398895, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013396017253398895, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1465318351984024, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13647028803825378, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1323985606431961, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11945392936468124, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06901568174362183, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06490693986415863, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07870674133300781, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07264640182256699, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07016748189926147, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06146496161818504, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05812397599220276, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.040023788809776306, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03476712107658386, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03309919312596321, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0326974093914032, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02000371739268303, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.017001768574118614, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.016842005774378777, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01545378752052784, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.015197676606476307, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010492113418877125, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010353827849030495, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009936708025634289, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006682493723928928, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.017001768574118614, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006682493723928928, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.127353698015213, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11857295036315918, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11490408331155777, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10360988229513168, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05989537388086319, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0562790147960186, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06873258948326111, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06333445757627487, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06090133264660835, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05334002524614334, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0505555234849453, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03493092581629753, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03027164936065674, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028707927092909813, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028334740549325943, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017440829426050186, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014705953188240528, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014550360850989819, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013358800671994686, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013117635622620583, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009093858301639557, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008914324454963207, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008574122563004494, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005664566066116095, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017440829426050186, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005664566066116095, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2772153317928314, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.25855645537376404, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2524969279766083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2279849499464035, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.13085411489009857, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1239146739244461, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14693401753902435, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.13532426953315735, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.13288258016109467, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11663047224283218, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.11026068031787872, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07473142445087433, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0646614357829094, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.06259240955114365, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0620989091694355, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.037227023392915726, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03180019184947014, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.031634848564863205, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.028829092159867287, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02850373648107052, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.019165972247719765, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01864047534763813, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0183886606246233, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011417888104915619, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011417888104915619, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011417888104915619, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22587953507900238, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18389742076396942, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16657593846321106, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13539093732833862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09938264638185501, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08260909467935562, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1290172040462494, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1151113510131836, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10553935915231705, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07686498761177063, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0720384493470192, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0651879832148552, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05637877807021141, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04934244975447655, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04751991480588913, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03346201032400131, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027888739481568336, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02728250063955784, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023961246013641357, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022827325388789177, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.019509827718138695, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.020841823890805244, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01745186559855938, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016663646325469017, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01745186559855938, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016663646325469017, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22847846150398254, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21352915465831757, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.20879653096199036, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18876159191131592, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10819116979837418, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10273531824350357, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12068045139312744, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11140423268079758, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10982313007116318, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09658899158239365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09134765714406967, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06164534017443657, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05339707061648369, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0519338957965374, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.051592983305454254, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03082621656358242, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02684890106320381, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02674848400056362, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02450837753713131, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02428618259727955, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016512271016836166, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016424093395471573, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01603730395436287, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011100273579359055, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016512271016836166, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011100273579359055, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.27385640144348145, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2561438977718353, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.25059783458709717, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.22660472989082336, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12951771914958954, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.12306167930364609, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.14443211257457733, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.13330669701099396, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1315111666917801, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11571768671274185, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10949748754501343, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.07354029268026352, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.06376369297504425, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.06203608587384224, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.06161707639694214, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03672715649008751, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.03171234205365181, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.03159588202834129, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.028866594657301903, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02859918214380741, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.019375447183847427, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.018831824883818626, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.018807945773005486, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.012015034444630146, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.012015034444630146, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.012015034444630146, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2841194272041321, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.253138929605484, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23963582515716553, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.21253333985805511, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13165871798992157, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11851741373538971, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.15792149305343628, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14557188749313354, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13677804172039032, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.113185815513134, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10648214817047119, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.08064842224121094, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06991155445575714, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06352197378873825, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.0619405172765255, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.040358349680900574, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03321804478764534, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03278527408838272, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.029538780450820923, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.028519053012132645, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.021425975486636162, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.02157222107052803, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.019307788461446762, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014648474752902985, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014648474752902985, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014648474752902985, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.15139304101467133, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1411730945110321, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.13732635974884033, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12387184053659439, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07135356962680817, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06729055941104889, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08086186647415161, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07472848147153854, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0725138708949089, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06358461827039719, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06011832505464554, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.041056178510189056, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.035730984061956406, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03418191149830818, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03381849825382233, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02048385702073574, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01743936724960804, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.017297666519880295, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.015828806906938553, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01559318508952856, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010617372579872608, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010421366430819035, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.010101588442921638, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006524262018501759, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01743936724960804, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006524262018501759, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1326093226671219, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12361353635787964, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12010510265827179, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10831402987241745, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06247018650174141, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.058864396065473557, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07116492092609406, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06566955894231796, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06348202377557755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.055667221546173096, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.052749574184417725, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03618067130446434, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.031374167650938034, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.029931113123893738, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029581356793642044, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01806042529642582, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015302124433219433, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015162715688347816, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013897440396249294, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01367623545229435, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009380853734910488, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009191688150167465, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008907335810363293, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005792396143078804, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01806042529642582, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005792396143078804, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.28492042422294617, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2660490870475769, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2599813640117645, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.23483864963054657, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.13472045958042145, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.12772653996944427, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.15087834000587463, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.13908638060092926, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.13680018484592438, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.12011895328760147, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.11359069496393204, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07676718384027481, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06646443903446198, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.06444375962018967, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.06396766752004623, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03826391324400902, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03272312879562378, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.032576289027929306, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.029680345207452774, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02936689369380474, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.019686764106154442, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019139854237437248, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01892460696399212, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011686353012919426, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011686353012919426, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011686353012919426, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24832013249397278, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19922247529029846, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17951618134975433, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16157810389995575, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1113489642739296, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08901514112949371, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1396530717611313, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1281103640794754, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11968189477920532, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08781003952026367, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08505243062973022, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07208891957998276, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.061941441148519516, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.054248418658971786, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.052298277616500854, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03637615591287613, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029097411781549454, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028536668047308922, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024628406390547752, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02331765927374363, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.020267164334654808, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.020192107185721397, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01795796863734722, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014611509628593922, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01795796863734722, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014611509628593922, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23254187405109406, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2172779142856598, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21251662075519562, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19197405874729156, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11012780666351318, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10456939041614532, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12286162376403809, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11341118812561035, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11181043833494186, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.098297119140625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09298773854970932, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06280940771102905, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.054331690073013306, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.052823424339294434, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.052473414689302444, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03134045749902725, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027271969243884087, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027172544971108437, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024878013879060745, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024656033143401146, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01667662337422371, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01662895642220974, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016185840591788292, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011171565391123295, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01667662337422371, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011171565391123295, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2769087851047516, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2588314712047577, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2532113194465637, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.22893987596035004, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1310255229473114, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.12444739788770676, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1458842009305954, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1348852962255478, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.13302171230316162, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1169685497879982, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.11059065908193588, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0743030309677124, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.06445273011922836, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.06268824636936188, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.06226649507880211, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03699929639697075, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.03192916885018349, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.03181459382176399, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.029017804190516472, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.028748581185936928, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01928320899605751, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.018812332302331924, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.018699532374739647, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011759204789996147, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011759204789996147, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011759204789996147, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2841893136501312, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2533188760280609, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23951847851276398, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.21254542469978333, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13173964619636536, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11848075687885284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.15967868268489838, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14625036716461182, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1368597149848938, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11342024058103561, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10688048601150513, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.08166347444057465, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.07032828778028488, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06367181241512299, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.06201274320483208, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.04120881110429764, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03344825282692909, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03298449143767357, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.029820308089256287, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02877230755984783, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.02261945977807045, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.021966654807329178, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.020498184487223625, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015173803083598614, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015173803083598614, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015173803083598614, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.156843900680542, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.14619652926921844, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.14216139912605286, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1282302737236023, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07397660613059998, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06974563002586365, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08397670835256577, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07747844606637955, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07515987008810043, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06588300317525864, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06232047826051712, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0427086316049099, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.037053611129522324, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03545320779085159, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.035072918981313705, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.021329468116164207, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.018167858943343163, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0180196575820446, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.016504570841789246, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.016258588060736656, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.011121360585093498, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010955151170492172, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.010571843944489956, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.007000466343015432, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.018167858943343163, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.007000466343015432, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13667921721935272, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1274646371603012, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12378615140914917, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11165008693933487, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06448445469141006, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06072089821100235, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07354717701673508, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06775931268930435, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06552468985319138, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05743985250592232, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05442861467599869, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03740779310464859, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03240058198571205, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030902568250894547, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.030541090294718742, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018672669306397438, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015808941796422005, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015665117651224136, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014362980611622334, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014132320880889893, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009726990014314651, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009519965387880802, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009230880998075008, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006026870105415583, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015808941796422005, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006026870105415583, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.28655537962913513, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.26747387647628784, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2613544464111328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2360529899597168, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.13550904393196106, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.12847654521465302, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.15175478160381317, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1398938149213791, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.13765142858028412, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1207590326666832, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.11416064947843552, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07724356651306152, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06683197617530823, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.06481029093265533, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0643354058265686, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.038481470197439194, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03295919671654701, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.032815661281347275, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02988531067967415, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02958129160106182, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.019843369722366333, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01932489685714245, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.019098175689578056, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01190162356942892, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01190162356942892, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01190162356942892, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2576063573360443, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21399705111980438, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19692333042621613, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17374970018863678, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11899232119321823, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.099210724234581, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14371584355831146, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.13213026523590088, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1240801215171814, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09433305263519287, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08731254935264587, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07378149777650833, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06355222314596176, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05753488093614578, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05602097138762474, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03694688901305199, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03026311658322811, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029741942882537842, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025442136451601982, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024426283314824104, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01992899365723133, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019962189719080925, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018024025484919548, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013871826231479645, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018024025484919548, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013871826231479645, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2308616191148758, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21572068333625793, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21090440452098846, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19061850011348724, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10942482948303223, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10387400537729263, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1220313087105751, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11272680014371872, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11113568395376205, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.097612164914608, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09228246659040451, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06239236518740654, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05400949344038963, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05250793695449829, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05214904621243477, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031111344695091248, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027102002874016762, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02700035646557808, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024708230048418045, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024480542168021202, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016528237611055374, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01652362197637558, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016034496948122978, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011089097708463669, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016528237611055374, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011089097708463669, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2780563235282898, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2598702609539032, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2542364299297333, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.22968585789203644, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.13165554404258728, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.12501312792301178, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.14678429067134857, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1355665773153305, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.13368436694145203, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11748260259628296, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.11109703779220581, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0749126598238945, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.06483137607574463, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.06305290013551712, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.06262623518705368, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03730279952287674, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.03222118690609932, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.03210210055112839, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.029301006346940994, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.029026273638010025, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.0195747222751379, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01914464309811592, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01899263635277748, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01220702100545168, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01220702100545168, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01220702100545168, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.29055362939834595, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.25857964158058167, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.24467860162258148, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.21720676124095917, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13459114730358124, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.12104532867670059, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.16142259538173676, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14897246658802032, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13987188041210175, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11572464555501938, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10886106640100479, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.08253996819257736, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.07149013131856918, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06491561233997345, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.06328047066926956, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.041309796273708344, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03386290743947029, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03340994566679001, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.030091751366853714, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.029029980301856995, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.022088872268795967, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.021903103217482567, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01998145505785942, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014729516580700874, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014729516580700874, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014729516580700874, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1525697112083435, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1421010047197342, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.13805340230464935, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12443067878484726, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07194895297288895, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06778594106435776, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08162300288677216, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07553119212388992, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07313089817762375, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06401016563177109, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.060462284833192825, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04147106036543846, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.036081429570913315, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.034474119544029236, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.034090932458639145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.020702971145510674, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01763724908232689, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01748601719737053, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.016000894829630852, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01575375907123089, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010778279043734074, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010604895651340485, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.010245962999761105, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006714481394737959, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01763724908232689, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006714481394737959, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13271276652812958, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12359093874692917, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11996622383594513, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10812254250049591, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06255646795034409, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05885158106684685, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07130344957113266, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06579123437404633, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06357906758785248, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.055670883506536484, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05265501141548157, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03626694157719612, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03147369995713234, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.029971439391374588, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029616987332701683, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018108127638697624, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015350878238677979, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015208679251372814, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013936501927673817, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013707349076867104, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009433463215827942, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009275704622268677, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008944819681346416, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005898427218198776, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018108127638697624, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005898427218198776, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2878277003765106, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.26854148507118225, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2623685300350189, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.23660989105701447, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1361815184354782, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1290142983198166, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.15233145654201508, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.14059092104434967, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.13831530511379242, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.12120950222015381, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.11447526514530182, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0775253102183342, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06718961149454117, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.06515636295080185, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.06467177718877792, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0386231504380703, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03309858590364456, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03294908627867699, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.029975611716508865, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.029666472226381302, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01987568847835064, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019367514178156853, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.019122546538710594, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011860689148306847, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011860689148306847, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011860689148306847, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23393838107585907, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19231759011745453, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17527949810028076, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15335163474082947, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10715365409851074, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08750738948583603, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1323801726102829, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12111978977918625, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11278479546308517, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08404064923524857, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07838161289691925, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06812455505132675, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05833776667714119, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05188295245170593, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05026712641119957, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.034074053168296814, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02732078544795513, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02675853669643402, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022809937596321106, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021695254370570183, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018217461183667183, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018227454274892807, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016058262437582016, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012576241977512836, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018217461183667183, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012576241977512836, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2304510623216629, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21519766747951508, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21037903428077698, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18998436629772186, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10924321413040161, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10365338623523712, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12176220864057541, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11257477104663849, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1109502911567688, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09739074856042862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09195583313703537, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06220464035868645, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.053942907601594925, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05242308974266052, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05206499248743057, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031047901138663292, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027057364583015442, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02695782482624054, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02464950457215309, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02442055381834507, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016481438651680946, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016493013128638268, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015985647216439247, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011064994148910046, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016481438651680946, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011064994148910046, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2800233066082001, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.26152336597442627, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2557917833328247, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.23109500110149384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.13258332014083862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.12583965063095093, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.14767977595329285, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.13651049137115479, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.13462498784065247, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1182047501206398, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.11166862398386002, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.07525130361318588, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.06525351107120514, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.06344735622406006, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.06301780790090561, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03746458515524864, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.032330188900232315, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.032208047807216644, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02935817278921604, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.029079480096697807, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.019525157287716866, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01907533034682274, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01892525516450405, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011946002952754498, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011946002952754498, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011946002952754498, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.29496651887893677, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2629755437374115, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.24889540672302246, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.22091181576251984, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13678939640522003, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.12322627753019333, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.16564524173736572, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.151490718126297, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.14219266176223755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11776381731033325, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.1108928918838501, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.08422394841909409, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.07275941967964172, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06609919667243958, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.0644388422369957, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.04236902669072151, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03465219959616661, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.034194935113191605, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.030864229425787926, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.029791152104735374, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.02300247922539711, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.022614840418100357, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.020839016884565353, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015481126494705677, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015481126494705677, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015481126494705677, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.32.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1564459204673767, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.14557315409183502, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.14140912890434265, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1273493468761444, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07375230640172958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0694335326552391, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08379711955785751, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07745295017957687, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07500196248292923, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06554751843214035, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.061866868287324905, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04259200766682625, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03700415417551994, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03534548357129097, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03493921831250191, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.021246351301670074, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.018077151849865913, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.017923228442668915, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01638188026845455, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.016125256195664406, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01103679183870554, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010877413675189018, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.010472457855939865, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00687938928604126, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.018077151849865913, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00687938928604126, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.32.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1340854912996292, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1247536689043045, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12100616842508316, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10900521278381348, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06317806243896484, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05940745398402214, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0722646415233612, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06661565601825714, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06425357609987259, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.056181758642196655, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05315038561820984, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03677346929907799, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.031848397105932236, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030289217829704285, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029916614294052124, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018346672877669334, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015510928817093372, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015361583791673183, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0140690254047513, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013830028474330902, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009561993181705475, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009375364519655704, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009043185040354729, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005948501639068127, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015510928817093372, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005948501639068127, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.32.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2940566837787628, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.27401870489120483, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2676863372325897, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2414063960313797, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.13910281658172607, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.13173167407512665, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.15581946074962616, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.14360009133815765, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.14133034646511078, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.12372628599405289, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.11680614203214645, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07930035889148712, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06864117085933685, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.066545270383358, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.06603440642356873, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03951924294233322, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.033826686441898346, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03367660194635391, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.030617525801062584, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.030289804562926292, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.020380396395921707, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019822528585791588, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01959063671529293, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012177086435258389, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012177086435258389, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012177086435258389, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.32.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24056605994701385, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1959386169910431, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1793898046016693, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15089410543441772, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10807043313980103, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09119465947151184, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.133085235953331, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1221187636256218, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11584719270467758, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08324680477380753, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07767539471387863, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06829334795475006, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05863634869456291, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.052193086594343185, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05056750774383545, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.034176480025053024, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02730206586420536, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026878654956817627, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022396493703126907, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02125055156648159, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018291747197508812, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01797066442668438, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016241371631622314, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012106161564588547, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018291747197508812, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012106161564588547, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.32.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2282077819108963, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21299338340759277, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.20820902287960052, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18792924284934998, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10821737349033356, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10265757888555527, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12070822715759277, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11155053228139877, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10993107408285141, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09640032798051834, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09102629125118256, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.061693668365478516, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05345248058438301, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.051943760365247726, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.051580894738435745, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03077024035155773, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026824122294783592, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026722485199570656, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024426070973277092, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02419767901301384, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016310209408402443, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016376441344618797, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01580810733139515, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011014431715011597, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016310209408402443, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011014431715011597, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.32.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.27821001410484314, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.25977182388305664, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2540493607521057, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2294326275587082, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.13182714581489563, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.12509985268115997, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.146870419383049, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.13578665256500244, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1338745653629303, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11749004572629929, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.110944963991642, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.07491882145404816, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.06493010371923447, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0631282851099968, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.06269588321447372, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03730064630508423, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.03221849724650383, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.032094556838274, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02925446629524231, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.028977716341614723, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.019498741254210472, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01908031664788723, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.018894322216510773, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.012059813365340233, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.012059813365340233, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.012059813365340233, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.32.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2917640209197998, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.26008540391921997, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.24623528122901917, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.21872785687446594, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13525430858135223, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.12188120186328888, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.16302703320980072, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14984123408794403, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.14069156348705292, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11662155389785767, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10981152206659317, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.08347813040018082, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.0719342976808548, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06525499373674393, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.0636046975851059, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.042077209800481796, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.034087080508470535, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03362513706088066, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.03036860004067421, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.029289593920111656, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.023026302456855774, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.022109655663371086, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.020883284509181976, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014915289357304573, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014915289357304573, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014915289357304573, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.33.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.15145382285118103, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.14085593819618225, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.13668732345104218, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12309142202138901, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07141401618719101, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06715834885835648, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0813666582107544, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07513689249753952, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07261484861373901, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06343146413564682, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05992668494582176, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04138337820768356, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0359504334628582, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.034244269132614136, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03383735194802284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.020655492320656776, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.017532266676425934, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.017369212582707405, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01588268391788006, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.015617027878761292, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010750983841717243, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01058219838887453, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.010184015147387981, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0067122625187039375, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.017532266676425934, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0067122625187039375, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.33.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1319476217031479, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12270935624837875, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11904584616422653, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10718361288309097, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06221487745642662, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0584540069103241, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07097920030355453, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06554427742958069, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06322656571865082, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.055274367332458496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.052233923226594925, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.036130595952272415, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.031354449689388275, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.029823003336787224, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0294503103941679, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01803404651582241, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01527048833668232, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015122092328965664, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013842475600540638, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013610588386654854, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00938976276665926, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009230000898241997, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008886902593076229, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005864273756742477, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01803404651582241, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005864273756742477, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.33.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.28844499588012695, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.26890829205513, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.26254674792289734, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.23676645755767822, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.13655975461006165, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1292717307806015, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.15293949842453003, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.14116457104682922, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.13875006139278412, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.12138038128614426, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.11455132812261581, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07781906425952911, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06745866686105728, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.06533630937337875, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.06482449918985367, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03877784684300423, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03318381682038307, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03303113952279091, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.030025487765669823, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02970617078244686, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.019954059273004532, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01941945217549801, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01918218657374382, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011864309199154377, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011864309199154377, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011864309199154377, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.33.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.25054869055747986, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2098110169172287, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19441543519496918, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16802014410495758, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11477626860141754, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09830356389284134, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14028489589691162, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12696868181228638, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12016139179468155, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09036912769079208, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08556460589170456, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0716712549328804, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06147269159555435, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.055887460708618164, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.054508209228515625, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.036238595843315125, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03010498359799385, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029665997251868248, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02551312744617462, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024599509313702583, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.020373491570353508, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.020637396723031998, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0186958946287632, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015361825004220009, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015361825004220009, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015361825004220009, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.33.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2257736772298813, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2106325924396515, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.20586048066616058, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18581561744213104, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10709881037473679, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10152100771665573, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11950647830963135, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11040037125349045, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10879337042570114, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09532289952039719, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0900263786315918, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.061050523072481155, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05292995646595955, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05142897367477417, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.051069971174001694, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030446652323007584, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026589972898364067, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026486679911613464, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02421162836253643, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.023982642218470573, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01614086888730526, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0162788275629282, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015648655593395233, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010996507480740547, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01614086888730526, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010996507480740547, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.33.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2775181233882904, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2590616047382355, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.25331011414527893, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.22865763306617737, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1315743625164032, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.12476205825805664, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.14676551520824432, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.13554470241069794, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.13363924622535706, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11717317253351212, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.11064344644546509, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.07492976635694504, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.06485579907894135, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.06303762644529343, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.06260296702384949, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.037345387041568756, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.032266005873680115, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.032151926308870316, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.0293045025318861, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02902880124747753, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.019659100100398064, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.019239215180277824, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.019061513245105743, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.012347767129540443, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.012347767129540443, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.012347767129540443, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.33.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.29216206073760986, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2598160207271576, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.24507851898670197, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.21772737801074982, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1353105753660202, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.12131374329328537, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.16471901535987854, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.15115010738372803, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.14097869396209717, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11656391620635986, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10989578813314438, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.08406949788331985, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.07261190563440323, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0653381422162056, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.06351112574338913, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.042451199144124985, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03417850658297539, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03364131972193718, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.03044561855494976, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02925386093556881, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.02322724647819996, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.022340944036841393, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.02092146873474121, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015075191855430603, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015075191855430603, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015075191855430603, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.34.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1545005738735199, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.14363470673561096, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1393369436264038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12533992528915405, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07288363575935364, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06849192082881927, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08290024101734161, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07674696296453476, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0741182267665863, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06464137881994247, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06097154691815376, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04217005893588066, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03668718412518501, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03494466096162796, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03452477976679802, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02104119397699833, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.017899315804243088, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01773088611662388, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01620021089911461, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.015929212793707848, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010967089794576168, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01081984955817461, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01039590872824192, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006889814976602793, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.017899315804243088, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006889814976602793, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.34.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1308341920375824, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12159039080142975, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11779081076383591, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10591145604848862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06166863068938255, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0578465573489666, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07076679170131683, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.065252885222435, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06273150444030762, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.054710716009140015, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05174596980214119, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03598063439130783, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03123036026954651, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02959003485739231, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029199624434113503, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01797371543943882, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015183786861598492, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01501777209341526, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013761383481323719, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013505774550139904, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009390469640493393, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009249605238437653, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00885507557541132, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0059206378646194935, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01797371543943882, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0059206378646194935, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.34.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2923300564289093, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.27233263850212097, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.26578614115715027, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.23940785229206085, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1383197009563446, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1308681219816208, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1550065577030182, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.14300356805324554, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1405496746301651, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.12283758074045181, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.11587075144052505, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07888972014188766, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06835129857063293, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.06622314453125, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.06570982187986374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.039293598383665085, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03363915905356407, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0334785059094429, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.03041735291481018, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.03008156083524227, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.02023952826857567, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019710613414645195, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.019450347870588303, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012072457931935787, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012072457931935787, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012072457931935787, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.34.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2177097350358963, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17933690547943115, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16328030824661255, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13516516983509064, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09710109978914261, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08172868937253952, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12283187359571457, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11313170194625854, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10463686287403107, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07559076696634293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07123202830553055, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06290804594755173, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05427628383040428, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04691765457391739, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.045028891414403915, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03140830621123314, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024555964395403862, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02397148869931698, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020321877673268318, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01899692974984646, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016551679000258446, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016463428735733032, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014132060110569, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010933003388345242, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016551679000258446, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010933003388345242, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.34.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22461961209774017, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.20952379703521729, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.20474235713481903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18463324010372162, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10654498636722565, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10098083317279816, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11885227262973785, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10987307876348495, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10824538767337799, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.094834104180336, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08956027030944824, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06077219918370247, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.052700694650411606, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0511794351041317, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05081774666905403, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030292972922325134, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026486603543162346, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.0263806339353323, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024116000160574913, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.023885957896709442, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016053715720772743, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0162586010992527, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015555727295577526, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011026489548385143, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016053715720772743, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011026489548385143, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.34.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2702051103115082, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.25203415751457214, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2464202642440796, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.22245346009731293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1280616819858551, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.12142026424407959, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.14290274679660797, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.13199199736118317, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.13009832799434662, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11404035240411758, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.1076362356543541, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.07284700125455856, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.06313534826040268, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.06135289743542671, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.060929782688617706, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.036311253905296326, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.03135339170694351, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.031231407076120377, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.028450429439544678, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.028174279257655144, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.019010452553629875, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01862310618162155, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.018415283411741257, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011838565580546856, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011838565580546856, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011838565580546856, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.34.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2878436744213104, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2546674311161041, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23970560729503632, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.21301861107349396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13270922005176544, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11869920790195465, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.16175077855587006, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1489480584859848, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13888007402420044, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11427026242017746, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10771872103214264, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.08265417069196701, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.07149636745452881, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06412951648235321, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.0622803159058094, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.0413687564432621, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03362056240439415, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.033139120787382126, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.029956167563796043, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.028780648484826088, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.022129422053694725, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.022106995806097984, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.019735243171453476, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015074172988533974, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015074172988533974, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015074172988533974, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.35.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.14977318048477173, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1392018049955368, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.13504788279533386, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12151970714330673, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0706491619348526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06634851545095444, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08043021708726883, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07440801709890366, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07183776795864105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06266000121831894, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0590885765850544, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.040904268622398376, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03557860106229782, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.033879898488521576, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.033475734293460846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.020411554723978043, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.017364216968417168, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.017198393121361732, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01571248285472393, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.015446853823959827, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010646583512425423, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010496199131011963, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01008959673345089, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006685506086796522, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.017364216968417168, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006685506086796522, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.35.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12671971321105957, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11778106540441513, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11412190645933151, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10264337062835693, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05973205715417862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05603024363517761, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06839533150196075, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06314083188772202, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.060729023069143295, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05298340320587158, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.050058770924806595, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.034770939499139786, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.030172904953360558, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028641728684306145, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028271496295928955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017369234934449196, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014700922183692455, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014548206701874733, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013317703269422054, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013079168274998665, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009073887020349503, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008942240849137306, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00856794323772192, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005730126518756151, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017369234934449196, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005730126518756151, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.35.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.290689080953598, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2708531320095062, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2644943594932556, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.23817336559295654, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1376662701368332, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.13024920225143433, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.15416240692138672, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.142238050699234, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.13988147675991058, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.12227588146924973, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.11531670391559601, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07846786826848984, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06800170987844467, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0658998042345047, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.06538360565900803, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03911231830716133, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03347684070467949, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03332428261637688, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.030273467302322388, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.029939968138933182, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.020147165283560753, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01961350440979004, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01937471330165863, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01202314905822277, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01202314905822277, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01202314905822277, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.35.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23196350038051605, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19469031691551208, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1823783963918686, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15076839923858643, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10646101087331772, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09253235906362534, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12533752620220184, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11510968208312988, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11160363256931305, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08228575438261032, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0740213394165039, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06428488343954086, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05529217794537544, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05135173350572586, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05039096623659134, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03213139623403549, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026867246255278587, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02664393186569214, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022125042974948883, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02146059088408947, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017200950533151627, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017293870449066162, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015936659649014473, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011957473121583462, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017200950533151627, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011957473121583462, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.35.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22182133793830872, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2068486511707306, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2021363526582718, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18229897320270538, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10526793450117111, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09973610937595367, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.117331862449646, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10856334120035172, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10695274919271469, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0936209112405777, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08833398669958115, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.059858568012714386, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05201662331819534, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05050947889685631, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0501595214009285, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.029837405309081078, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0260572861880064, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025959623977541924, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.023696579039096832, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.023469706997275352, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015683677047491074, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015863865613937378, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015189195051789284, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010608579963445663, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015683677047491074, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010608579963445663, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.35.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.26293691992759705, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.245365172624588, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23987339437007904, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.216441348195076, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.124649278819561, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11818378418684006, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1389068067073822, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12846283614635468, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.126643106341362, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11093681305646896, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10464978218078613, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.07077228277921677, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.06140836700797081, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05967506766319275, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05926487594842911, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.035227831453084946, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.030425777658820152, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.030312268063426018, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.027577558532357216, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0273114200681448, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01831645518541336, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017964079976081848, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017734160646796227, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011268829926848412, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01831645518541336, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011268829926848412, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.35.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.28911253809928894, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2543574571609497, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.23913466930389404, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.21254193782806396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.13291442394256592, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11842530965805054, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.16217735409736633, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.14922797679901123, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.13954250514507294, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.11407189816236496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10764420032501221, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.08284081518650055, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.07157876342535019, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06414481997489929, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.062288183718919754, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.041423019021749496, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.033491019159555435, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03303084149956703, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.029732590541243553, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02852659858763218, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.022042620927095413, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.021862516179680824, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01959921605885029, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014641180634498596, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014641180634498596, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014641180634498596, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.36.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.14502215385437012, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13462312519550323, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.13031701743602753, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11703749746084213, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06830275058746338, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06401623040437698, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07812696695327759, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0723072811961174, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06950438022613525, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0604698546230793, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.057036977261304855, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03975580260157585, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03459088131785393, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03277181461453438, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0323336236178875, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.019853053614497185, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.016813203692436218, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.016632534563541412, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.015204786323010921, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014923459850251675, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010368834249675274, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.010239137336611748, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009779090993106365, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006556754466146231, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.016813203692436218, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006556754466146231, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.36.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1193610429763794, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11073167622089386, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10703691095113754, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09609698504209518, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05612177774310112, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05248001962900162, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06459515541791916, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05971020460128784, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05708995461463928, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04970967397093773, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04690272733569145, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03282421454787254, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028554225340485573, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026909319683909416, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026516959071159363, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016404664143919945, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013833819888532162, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013666241429746151, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012519187293946743, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012265876866877079, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008584066294133663, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008472825400531292, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008060180582106113, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005450401455163956, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016404664143919945, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005450401455163956, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.36.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2844407856464386, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2649337947368622, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.25867167115211487, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.23291826248168945, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.13465942442417145, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.12743917107582092, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.15109580755233765, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.13927367329597473, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.136863112449646, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11955688148736954, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.11277022212743759, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07696802169084549, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06660164892673492, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.06448718160390854, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0639825239777565, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.038374315947294235, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.032834019511938095, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03267843276262283, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.029686883091926575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.029358038678765297, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01979244500398636, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019337009638547897, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.019010936841368675, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011993643827736378, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011993643827736378, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011993643827736378, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.36.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2234121859073639, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18515188992023468, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17129968106746674, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14111913740634918, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10192690044641495, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0871209129691124, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12374663352966309, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11314838379621506, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10784157365560532, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07813487201929092, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07201173156499863, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06378696113824844, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05458274111151695, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.049468763172626495, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.048192426562309265, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03212713077664375, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026358190923929214, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02600475214421749, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021814294159412384, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020946413278579712, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01768091320991516, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01777583546936512, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016077397391200066, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012845898978412151, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01768091320991516, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012845898978412151, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.36.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2127109318971634, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19831649959087372, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1937551349401474, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17465008795261383, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10089650005102158, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09556537121534348, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1125289723277092, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.104090116918087, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10250220447778702, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08971782773733139, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08466065675020218, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.057421524077653885, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04989093542098999, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0484289787709713, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.048083171248435974, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02863219380378723, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.025021908804774284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02492511086165905, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022757647559046745, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022535907104611397, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015072310343384743, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015300563536584377, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014586560428142548, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010295706801116467, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015072310343384743, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010295706801116467, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.36.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24869146943092346, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23200519382953644, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2267356961965561, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20451365411281586, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11792921274900436, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11176546663045883, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13149048388004303, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12159258127212524, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11981284618377686, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10490933805704117, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09895091503858566, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06706936657428741, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.058140672743320465, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05648282170295715, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.056083954870700836, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033393122255802155, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028850853443145752, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028737280517816544, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026159385219216347, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025903603062033653, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.0174267441034317, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017126591876149178, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01686863787472248, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010868179611861706, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.0174267441034317, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010868179611861706, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.36.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.276783287525177, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.24246063828468323, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22776861488819122, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.20235803723335266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12734703719615936, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.11308874934911728, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1558525413274765, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1425676792860031, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.133745476603508, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10870122909545898, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.10260172188282013, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07964377850294113, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06857366114854813, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.06169236823916435, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.059960346668958664, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.039988819509744644, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.032598864287137985, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03220940753817558, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.028944585472345352, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.027840498834848404, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.021711470559239388, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.021721046417951584, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.019465016201138496, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015220488421618938, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015220488421618938, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015220488421618938, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.37.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13538050651550293, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12553322315216064, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1214010939002037, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10890804976224899, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06377009302377701, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05965210869908333, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07334095984697342, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06767460703849792, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06488322466611862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0563991479575634, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05325424298644066, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03733903169631958, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03240694850683212, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0306387972086668, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.030211633071303368, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01868659071624279, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015806671231985092, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01563258282840252, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014301921240985394, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01403123140335083, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009843721054494381, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009762669913470745, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009272840805351734, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006420082878321409, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015806671231985092, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006420082878321409, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.37.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11515506356954575, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10684863477945328, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10309150069952011, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09253058582544327, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05419173836708069, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.050609029829502106, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06282839924097061, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0579395554959774, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05515408515930176, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04798290878534317, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.045318327844142914, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03194023668766022, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027737190946936607, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02603357471525669, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025626827031373978, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01598444953560829, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013432065956294537, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01325423177331686, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01216964703053236, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011905810795724392, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00842087622731924, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00832363124936819, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007867707870900631, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005451907869428396, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01598444953560829, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005451907869428396, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.37.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.27087411284446716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.25192996859550476, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.24572241306304932, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.22091956436634064, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12815682590007782, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.12103115767240524, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14405332505702972, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.132786363363266, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.13023488223552704, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11354519426822662, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10710199922323227, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07340999692678452, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06349950283765793, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.06137005612254143, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.06086530163884163, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03658442199230194, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.031287889927625656, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.031124239787459373, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.028253495693206787, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02792082168161869, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018917793408036232, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018503807485103607, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018150800839066505, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011541132815182209, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018150800839066505, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011541132815182209, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.37.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.15133251249790192, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12423050403594971, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11484634876251221, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09883620589971542, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06835411489009857, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05764935538172722, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08297888934612274, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07572252303361893, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07215119898319244, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05313165858387947, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05042035132646561, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.042697399854660034, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03731889650225639, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03409247845411301, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.033291544765233994, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02171645313501358, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.019524378702044487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.019323909655213356, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.016931364312767982, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01645674556493759, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01258687861263752, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014622245915234089, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01164006907492876, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012133394367992878, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.016931364312767982, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01164006907492876, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.37.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20155608654022217, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18792955577373505, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1836071014404297, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16553984582424164, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09562455862760544, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09059426188468933, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10679175704717636, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09869641065597534, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09719166904687881, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08504191040992737, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08027436584234238, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.054631270468235016, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04737852141261101, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.045968420803546906, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04563439264893532, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.027280403301119804, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023858526721596718, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02375977486371994, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02172449789941311, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021511612460017204, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01456440705806017, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014745118096470833, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014099128544330597, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010111588053405285, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01456440705806017, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010111588053405285, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.37.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22923307120800018, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21380484104156494, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.20892982184886932, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1883867084980011, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10873309522867203, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10299991816282272, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12140243500471115, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11214134842157364, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11048444360494614, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09667506068944931, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09119582176208496, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0620616152882576, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05369515344500542, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05213743448257446, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05176793411374092, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030929988250136375, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026773542165756226, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02666783705353737, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024307291954755783, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024071669206023216, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016323264688253403, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016111183911561966, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015807705000042915, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010521786287426949, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016323264688253403, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010521786287426949, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.37.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2646007835865021, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23018930852413177, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21617138385772705, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19208969175815582, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1217043548822403, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10759235918521881, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.14850100874900818, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.13568440079689026, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12789373099803925, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10325952619314194, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09752631187438965, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07623351365327835, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06550904363393784, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05916787311434746, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05758683755993843, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03846433758735657, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.03167124092578888, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.03135190159082413, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02813599817454815, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.027149368077516556, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.02131265215575695, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.021542586386203766, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.019321495667099953, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015731455758213997, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015731455758213997, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.015731455758213997, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.38.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13332942128181458, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12392713129520416, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11987769603729248, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10761886090040207, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06283455342054367, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.058850377798080444, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07248315960168839, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06675489246845245, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0639098659157753, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05568646267056465, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.052709754556417465, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03688035532832146, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.031967997550964355, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030198687687516212, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029776988551020622, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018481431528925896, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015579297207295895, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015396641567349434, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014123315922915936, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013849126175045967, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009758872911334038, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009618923999369144, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009200336411595345, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006305897142738104, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015579297207295895, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.006305897142738104, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.38.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11136036366224289, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10350608080625534, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09986811131238937, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0896017849445343, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05240553617477417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.048947133123874664, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.060793764889240265, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.056103404611349106, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0533025749027729, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04645953327417374, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04390126094222069, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.030906811356544495, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02685956098139286, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02518412284553051, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02477426640689373, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015478943474590778, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0130029721185565, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012822638265788555, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011792325414717197, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011529851704835892, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008181994780898094, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00807338859885931, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007647525519132614, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005294455215334892, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015478943474590778, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005294455215334892, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.38.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.27480122447013855, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2559891641139984, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2499270737171173, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.22478079795837402, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.13017119467258453, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.12317128479480743, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.14596395194530487, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.13451485335826874, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.13227570056915283, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11547849327325821, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10887175798416138, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07436956465244293, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.064337819814682, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.06232515722513199, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.061845581978559494, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03706275299191475, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.031773727387189865, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.031628843396902084, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02872348204255104, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.028408298268914223, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.019151045009493828, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018756389617919922, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018405642360448837, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011719939298927784, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011719939298927784, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011719939298927784, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.38.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1608734130859375, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.131771519780159, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12268783152103424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09891495853662491, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07320807874202728, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06317929923534393, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08643357455730438, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07881836593151093, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07742387056350708, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.053757112473249435, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04945293813943863, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04471307620406151, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.038536474108695984, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03605874627828598, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03546134755015373, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02275674231350422, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.019974568858742714, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.019884711131453514, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01650526188313961, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.016106022521853447, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.013140409253537655, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014155386947095394, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.012428256683051586, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011297832243144512, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01650526188313961, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011297832243144512, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.38.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.16735824942588806, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.15593211352825165, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1523268222808838, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.13738790154457092, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07971953600645065, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07553108036518097, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08918249607086182, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08228521794080734, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08101524412631989, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0709925964474678, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06710220128297806, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04610833153128624, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04026716202497482, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03911858797073364, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.038843341171741486, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02325698547065258, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0216247346252203, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021557064726948738, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02003173530101776, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019876373931765556, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013334023766219616, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015090607106685638, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012973635457456112, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.012208281084895134, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013334023766219616, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.012208281084895134, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.38.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18530155718326569, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17272906005382538, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1687481850385666, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15236137807369232, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08838696032762527, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0837295800447464, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09902562946081161, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09116801619529724, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08980742841959, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07872822135686874, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07448254525661469, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05118094012141228, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04460296034812927, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.043351516127586365, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.043069738894701004, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025888854637742043, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02397337183356285, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023893438279628754, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022206077352166176, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022034209221601486, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014951740391552448, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016707580536603928, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014575096778571606, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.013513052836060524, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014951740391552448, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.013513052836060524, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.38.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.19934329390525818, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.17138536274433136, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.16046403348445892, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1423504799604416, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.09137611836194992, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.08015372604131699, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.11383030563592911, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.10202890634536743, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.09606504440307617, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.07712244242429733, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.07319275289773941, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.05847075209021568, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.04995812475681305, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.04505278915166855, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04388279840350151, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.030014067888259888, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02521810308098793, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.024918705224990845, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.022638846188783646, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02186177857220173, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01780286617577076, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018318021669983864, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016150852665305138, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014506933279335499, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01780286617577076, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014506933279335499, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.39.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.125716894865036, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11687266081571579, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11296794563531876, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10149975121021271, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05914955213665962, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.055353790521621704, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06878796964883804, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06307332962751389, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06014557555317879, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05250571295619011, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04970180243253708, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.035031046718358994, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03019840456545353, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02842334471642971, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.027990087866783142, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017537422478199005, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014650623314082623, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014466452412307262, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013295218348503113, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013021922670304775, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009254979901015759, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009058618918061256, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008687660098075867, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005925104953348637, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017537422478199005, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005925104953348637, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.39.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09891419857740402, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09190469235181808, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08856520801782608, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07951550930738449, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0464756116271019, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04334348812699318, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05423670634627342, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04995645210146904, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04726037755608559, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04121088609099388, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03896801918745041, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02761640027165413, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02393576316535473, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02233888767659664, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02195057086646557, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01383962668478489, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011576426215469837, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011404327116906643, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010513640008866787, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010267052799463272, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007357606198638678, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007272433489561081, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00685123959556222, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004848215728998184, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01383962668478489, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004848215728998184, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.39.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.25942501425743103, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24172623455524445, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23601849377155304, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21233665943145752, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12282142788171768, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11611422151327133, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13945068418979645, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12709736824035645, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12479729950428009, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1090870276093483, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.103215292096138, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07139957696199417, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06108997017145157, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05904913693666458, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.058530714362859726, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.036061789840459824, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.03045072592794895, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.030267486348748207, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027638627216219902, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.027312977239489555, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01892269030213356, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018413420766592026, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018157055601477623, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01199918519705534, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018157055601477623, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01199918519705534, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.39.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.14696936309337616, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11111810058355331, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09683462232351303, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08200064301490784, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06413347274065018, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04759121313691139, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08601497858762741, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07496728003025055, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07018009573221207, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04700866714119911, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.045977674424648285, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04452973231673241, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03774464875459671, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.033063855022192, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03186867758631706, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.023482365533709526, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02019956335425377, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01993408612906933, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01753334514796734, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.016859596595168114, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014683338813483715, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016324928030371666, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013434222899377346, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014162513427436352, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01753334514796734, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013434222899377346, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.39.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.14976629614830017, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1394997388124466, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1362268477678299, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.12273235619068146, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07081630825996399, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.06703454256057739, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.07923053205013275, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07313583046197891, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0719616562128067, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.06293854862451553, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.05941925570368767, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04044283553957939, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03505687788128853, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.034004341810941696, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03375745564699173, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02016618847846985, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.017628874629735947, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.01755518652498722, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.016037052497267723, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.015878858044743538, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.010692018084228039, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.010865295305848122, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.010341767221689224, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.0074118077754974365, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.017628874629735947, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.0074118077754974365, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.39.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1491340845823288, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.13893884420394897, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.13565079867839813, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.12230104207992554, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07101122289896011, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.06721553951501846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.07966198772192001, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07332129031419754, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07215409725904465, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.06314708292484283, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.05977894738316536, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.041097674518823624, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03561856597661972, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03457813337445259, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0343378409743309, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.020675696432590485, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.018723569810390472, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.018655551597476006, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.017244497314095497, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01709871180355549, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011649003252387047, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012613571248948574, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011325611732900143, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009808718226850033, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.017244497314095497, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009808718226850033, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.39.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.10034426301717758, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.08578943461179733, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.07922631502151489, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.07036907970905304, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.04582477733492851, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.03961125761270523, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.05926774814724922, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.05244529992341995, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.04822296276688576, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.03871062397956848, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.03701876103878021, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.030417650938034058, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.02608519233763218, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.02317698858678341, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.02240060456097126, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.01582014374434948, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.013718873262405396, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.013537019491195679, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.012550313957035542, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.012132114730775356, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.009524013847112656, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.010755053721368313, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.00859740935266018, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.009138758294284344, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.01582014374434948, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.00859740935266018, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + } + ], + "last_module_idx": 82, + "base_perplexity": 5.211368508453587 +} \ No newline at end of file