diff --git "a/measurement.json" "b/measurement.json" new file mode 100644--- /dev/null +++ "b/measurement.json" @@ -0,0 +1,78047 @@ +{ + "last_module_idx": 82, + "measurement": { + "lm_head.linear": null, + "model.layers.0.mlp": [ + { + "accuracy": 0.9120529388126574, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9191886374824926, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.923873550013492, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.924250558802956, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804354043383348, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865219310710305, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878323078155518, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906760046356603, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940403856729206, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916873942864569, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947904187597727, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971651956439018, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974056499961176, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981925491439668, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985099367209171, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989039488136768, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993500335043982, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.0.self_attn": [ + { + "accuracy": 0.8917310112401059, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.907581279152318, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9182003987462897, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9436478552065397, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9497632039220709, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9517694680314315, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9644961168891505, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9656833378892196, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.969817832896584, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9717077393280832, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9764295251745927, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781478235596105, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9796723409702903, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814330872736479, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886138627403661, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905072976099817, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909107465493051, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940290796129327, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975078896080193, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.1.mlp": [ + { + "accuracy": 0.9795503835929068, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800761326363212, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834093435814506, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984509383377276, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896884137078336, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905049793030086, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920138293191006, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946184491640643, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951227793568059, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947649275785998, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954077962197756, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973321062953848, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977118912103929, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985519427021867, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986414394684529, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989430766160551, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996087162657395, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.1.self_attn": [ + { + "accuracy": 0.9795904645794317, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.980427484763296, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834867700150138, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.98685364738891, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877019312820936, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886526928136223, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893864095211029, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990412346626583, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928118508113059, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929435425683072, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943249919696858, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952892649330591, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946651360706279, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957688352779338, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966288942255472, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978421279474309, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969349095695897, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990836089771045, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992152866662333, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.10.mlp": [ + { + "accuracy": 0.9586201843462492, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9599149101658871, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9668192988947818, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9688664486533717, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790585433181963, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9808339884406642, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836934616691188, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889968586595435, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899577082772004, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892530558924926, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906330092957145, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944935150836643, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995312927977035, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969650138365594, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971834554484016, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978279587077467, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991882733118377, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.10.self_attn": [ + { + "accuracy": 0.9693804289165296, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9705122677903426, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9731054243288542, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9807115338350597, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841852392020979, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984497136191318, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899115060505114, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903241835142437, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911632569212663, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915683920446196, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917895362565392, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927322433183068, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932025725904264, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936245381832123, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962075976164717, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967702162501059, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974623344054347, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981065947366389, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993068668501157, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.11.mlp": [ + { + "accuracy": 0.9580708648029127, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9593760998625505, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9662130569156847, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9681734379969145, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9788222752119365, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9807027076420031, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.983451921688883, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889056235551834, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899035323607294, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891246863101658, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905547319274199, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944124092396937, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995266784570719, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969189316034317, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971285380030933, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977469573679724, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991753938558855, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.11.self_attn": [ + { + "accuracy": 0.9701809506667288, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9711287962762933, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9735704440819589, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800852866549241, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841900797266709, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845750033855438, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894215205782338, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899950090207552, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908298457923689, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907682326279188, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922779837721273, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927316198223516, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930152038210317, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936142803022736, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960614834961138, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967211014346072, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971728726829353, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979821537670336, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999278868499555, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.12.mlp": [ + { + "accuracy": 0.9590829422599391, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9603064091582048, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9667259799806696, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9686390883044192, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9792670027205819, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9810673265080703, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837574896059538, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891219531234942, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900697399126855, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893656294596823, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907359300475371, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945389734287011, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995356982083697, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969880943627734, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972046333316126, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977953077146882, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991744602785299, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.12.self_attn": [ + { + "accuracy": 0.9710316156086168, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9719274859679373, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745332912394875, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812650037439246, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845930212422421, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851089684586776, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893632978200912, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900036869864715, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912090309356388, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919948170059606, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925160447233602, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930901645045531, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934187217762596, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938879585579822, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996221979197703, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969542820594812, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972109192688214, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981112782108156, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992886596408329, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.13.mlp": [ + { + "accuracy": 0.9568339084324083, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9582300688091078, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9648797355200115, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9668591053862321, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782212517763439, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800965346788105, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829292862038863, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885593488028175, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895515198770323, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888279163523724, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990261255910522, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942577536168852, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951133610386598, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968253138818239, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970532457687353, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997666599719148, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991312813209859, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.13.self_attn": [ + { + "accuracy": 0.9678030076779818, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9686944484710693, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9708542510082847, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9767953675044211, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9822993482414045, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824460054698744, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868489394062444, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869990725266305, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882367849349976, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909243638578215, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915962878026461, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925097540805214, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926658162945196, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935166529918972, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957400779975089, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966561233526782, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968548146517653, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979963975125238, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990663809799835, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.14.mlp": [ + { + "accuracy": 0.9555161501231947, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9569116328891955, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9638753156912954, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9659868572887621, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9774553964012548, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9793869570681923, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823351690643712, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881135233138737, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891030270802347, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883886105135867, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898762906852522, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940396970824191, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949353839221754, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966842708619017, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969442974972097, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975789973610326, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990822335607127, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.14.self_attn": [ + { + "accuracy": 0.9670905878669337, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9679275813855623, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9701400399208069, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9753177118928809, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9827338692389036, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830219196645837, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866313055941933, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868395124611101, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877498314568871, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886803399575385, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912889921351483, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918103414146524, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920429869701988, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924602845781728, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957861617991799, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963573887944221, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967102608398387, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979358219394558, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990382065114222, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.15.mlp": [ + { + "accuracy": 0.9542913656485708, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9557831350125765, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9632398705733449, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9655391166084691, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9767908155918121, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787829628116206, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819561368540713, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877053789402309, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887838591086237, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881015035666918, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896209287016016, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939063818831193, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948071253142858, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966110234197817, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968901970668843, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975816505519968, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990878843359257, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.15.self_attn": [ + { + "accuracy": 0.9595691185248525, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9609493299534446, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9631285604677702, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.968805658189874, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815208206051275, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818927551570692, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864602198726252, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869593750489386, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987566918918961, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882421140608034, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899379595329887, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908068297724975, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990931145454708, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916575166739916, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994540282770207, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951062708308822, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954713802588614, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966960361129359, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989517942855233, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.16.mlp": [ + { + "accuracy": 0.9524991637782046, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9540831509389376, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9618686061156423, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9643348298574749, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.975823334957424, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778952692684374, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.981242121834504, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871093936656651, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882499356018869, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875994654078233, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891891871628008, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936480616268358, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945879217825437, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996441062343748, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996762988990859, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975037257138052, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990409012687834, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.16.self_attn": [ + { + "accuracy": 0.9589172287991172, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9600719025260523, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9613883432589079, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9669320144151387, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9803532471782282, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.980860206641649, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860950802501879, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866545921877811, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883006450377012, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.988730002390711, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903650315184342, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910886334745508, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913826553445113, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921124467724248, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952442708768343, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958846208296324, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963541493604058, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975724275174894, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990128772823434, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.17.mlp": [ + { + "accuracy": 0.94839839872561, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9502078733946148, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9586968861128154, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9614832903209486, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9737622816311685, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9760493479276958, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9796776912714306, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858668540653429, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871715958181181, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865571338879434, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883021265268326, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931136722627439, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941442491192567, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996098025848991, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964993141199413, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973129777139739, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989402361802364, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.17.self_attn": [ + { + "accuracy": 0.9589405028443587, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9604438731544896, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9642294802163777, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9725920150154516, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9788786549317209, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791854384698366, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860009033428995, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863390514725133, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986998787051753, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875295162200928, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888095079283965, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898804190911745, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903626112561477, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911544832744097, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946565235915937, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955431034690455, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960178399556562, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973799343171873, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988938371993994, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.18.mlp": [ + { + "accuracy": 0.9465869916112799, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9484542514148512, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9573310299923545, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9602620883991844, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9729353346322712, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9752374824724699, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790633477662739, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854674621632225, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867769526807886, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861400786199068, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879213681346491, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929029353355107, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939540518741858, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995979988261273, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963911826673307, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972263341279406, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988905439447415, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.18.self_attn": [ + { + "accuracy": 0.9590376144961307, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9608305284851476, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9644758858178791, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.972642647592645, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782375552152333, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790177376646745, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984330908248299, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852936895270097, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878197790760743, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886860431809175, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891517460346222, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905432745030052, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901779780262395, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916385200462843, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942627964835418, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959056604849664, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953707919309014, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976591444329211, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986934183459533, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.19.mlp": [ + { + "accuracy": 0.9462773988121435, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9481530283626757, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9569295174197147, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9598458660276312, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.972786241456082, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9751304842923817, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789239183852547, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854252212925961, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867185275805624, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860531891647139, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878853951629839, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928582044024217, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939379339155398, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959490244325838, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963778304426294, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971857004259762, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988927336311654, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.19.self_attn": [ + { + "accuracy": 0.9581998806250722, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9593838829743235, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9632663256243655, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9702713771870262, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770010380368483, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9786009584602556, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821598137679853, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840445267526727, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874028100779182, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879540534395921, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888218837349039, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907006440978301, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989669144153595, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916500841316424, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935788723983263, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956750924649992, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994472073096978, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974871232713524, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985953351776851, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.2.mlp": [ + { + "accuracy": 0.9741589144656533, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9746250447474027, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9672250308488545, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9676104344819721, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813911020755768, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913545352847952, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924258276035911, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897098964766452, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941472211166432, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992933379976373, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954796233459523, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959633311158732, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939983436151555, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963715139188265, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981154975922484, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982577809377721, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998417970479319, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.2.self_attn": [ + { + "accuracy": 0.9646184413056624, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9657763838768005, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9748114974875199, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782079222955202, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798188727152976, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828378432675412, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816957288666776, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849752642606434, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900434456373516, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904077586374784, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911457017848366, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936535644688105, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916788927818599, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941177952446436, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947978505178502, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970232246345595, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950969932894957, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986343321047331, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987243535487276, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.20.mlp": [ + { + "accuracy": 0.9461619477523, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.947939395904541, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.956803626135776, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9597350264850416, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9728124737739563, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9750677049160004, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789231952868009, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985506816914207, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868130636842627, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861327566598591, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878847583344108, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929182670618358, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939444935635516, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960381306315723, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964012084038634, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972027946067484, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998906061249344, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.20.self_attn": [ + { + "accuracy": 0.9641137750525224, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9648022808526692, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9684580250790245, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9749044274028978, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797518363124446, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.981199070027, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984020532746064, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858321798475165, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892154179121319, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891003636937392, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905732846573779, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917246848344803, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912660443469098, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925491754945955, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949248201752964, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963219706949434, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958370288735942, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978933044170079, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998779487256941, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.21.mlp": [ + { + "accuracy": 0.9448394775390625, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9465732699946353, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9554874238214994, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9584580440270274, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9720579103419655, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743495254140151, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782718043578299, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851486494666651, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986486637278607, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857453217631892, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875451365583822, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927361207573038, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937808129348253, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959581353162464, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963080149732138, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971285246704754, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988910098020968, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.21.self_attn": [ + { + "accuracy": 0.9668700036249662, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.968083425572044, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9708534134061713, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9772143332581771, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828941680883106, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833263143112785, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876570521216643, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882300605899409, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901010386253658, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908254593610764, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908941152848696, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992254345824844, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917087076525939, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934473971002981, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957121728282226, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966751635074615, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966660624271945, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980308399780801, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990689354507547, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.22.mlp": [ + { + "accuracy": 0.941672965099937, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9435294113661113, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9528940037677163, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9560073112186632, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.970350698420876, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9728539491954603, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9769818767120964, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841982565428081, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856404928784621, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848060403999529, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867834401758093, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922058417608863, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933862105796212, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956257923653251, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960203049214262, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968742522362032, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987788495460623, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.22.self_attn": [ + { + "accuracy": 0.9646712071017215, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.965913973356548, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9698113485386497, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9757946964941526, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812766031215066, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824399320702804, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855249053553531, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868652397080472, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.988632835839924, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891275018453598, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905590587540677, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992018527106235, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914028307324961, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929749722543516, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949601018115094, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963468023036656, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957005263943421, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979220887548045, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998816082548154, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.23.mlp": [ + { + "accuracy": 0.9391818862212331, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9411285300003855, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9507293732542741, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9539145168505216, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9690654936589693, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9716865734050149, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9759389805166345, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.983463034818047, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984977701776906, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841765024160084, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862256708898043, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991890098703535, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931024069848814, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954296711244082, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995869760450564, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967549274626532, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987353376652065, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.23.self_attn": [ + { + "accuracy": 0.9686638430545205, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9694345718935916, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9728512356155797, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789325610587472, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832553314535242, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846199123482955, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873999694460317, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891766215625563, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905378151881067, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908181766146108, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920847933543356, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927640965110377, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928335437649175, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935889902867769, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955871058137793, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967320120256198, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963947875719321, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998163099743818, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989736327215245, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.24.mlp": [ + { + "accuracy": 0.9405294970462197, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9422792133532072, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9514701021345038, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9544573269392315, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9697991264493842, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.97224003390262, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9763291086021223, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984023453373658, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854495305764047, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846039492832986, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865287322747079, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921607500628421, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932780548145896, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956589686243158, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960173446881143, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968471419262258, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988158994207257, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.24.self_attn": [ + { + "accuracy": 0.9658760334316053, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9672617441729495, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9702811680342022, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9759691326241744, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9810991506827506, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.983012285671736, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856888777331302, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881138558450498, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891660919314936, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899258323405918, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913887969757381, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921135306358337, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922477273564589, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931491056555196, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955603641114736, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99654500029589, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964311267984541, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980703439367445, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990213770223292, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.25.mlp": [ + { + "accuracy": 0.940871094402514, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9425882916701467, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.951342196840989, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9541229480191281, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9699913953479967, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.97249142119759, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.976383963697835, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984158478285137, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.98561261829577, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847198724746704, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866320130072141, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922179720903698, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933241684185831, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956947000403154, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960420041492111, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968202723876426, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988028322787661, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.25.self_attn": [ + { + "accuracy": 0.9701378063151711, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9717512287591633, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9753530778382954, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9808171595397749, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833152701980189, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850897961541226, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870929772916593, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891950456719649, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906313842848727, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916425496339798, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927819881000017, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933683111479408, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935156322623554, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941701740026474, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959958858395878, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968592010830578, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967019801077089, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982199437524143, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991263555068719, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.26.mlp": [ + { + "accuracy": 0.9414061998066149, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9430955083746659, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9515364421041388, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9542352808149237, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9702033369164718, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9725954407139829, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9763603084965756, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843364232464841, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857263282725686, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984819272631093, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867104197803297, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992274566700584, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933724599449258, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957562081123653, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960713335557988, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968150299238531, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988427523915705, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.26.self_attn": [ + { + "accuracy": 0.9735901575339468, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9746775862417723, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770927366457487, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818193034121865, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849712033020822, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863960335129186, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886680457152819, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904306946616424, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991555675079948, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921127730294278, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929548337271339, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935379451826999, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936579948193148, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943417444040901, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962397911830952, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972078698246103, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969074892762461, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985235776556166, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991399730114561, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.27.mlp": [ + { + "accuracy": 0.9405552462527627, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9422347796590704, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9504849534285695, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9531281872799522, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9697188201703524, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9721675201466209, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9758494778683311, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841247546045404, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855290682692277, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845857730037287, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865051727545889, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921548860637766, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932698442747718, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956952631473541, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960074322788339, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967298101829855, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988186674094514, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.27.self_attn": [ + { + "accuracy": 0.9741319292470029, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9755225479602814, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9780831274233366, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815421245600048, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847280241941151, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865598913870359, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876773333863208, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902614668795937, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919849218506562, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922091364860535, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930871246676696, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938708794744391, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940145878415358, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948338501547512, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962374320939967, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973757621881209, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968115416796584, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986170651881319, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991587825903767, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.28.mlp": [ + { + "accuracy": 0.9393402526253148, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9410361051559448, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9493237420132286, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9519771180654827, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9690912648251182, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9715763393201327, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9752675608584755, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837554865761807, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852237858270344, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842710730276609, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986223882750461, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919996018472471, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931274526997617, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956030437820836, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959369286110527, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966671894255438, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987919148253767, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.28.self_attn": [ + { + "accuracy": 0.9692406497503582, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9708345720642492, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9739426374435425, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9792196844753466, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832273376615424, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845005148335507, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869755247705861, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884730224546633, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903430279932524, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990897777049165, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919982066279963, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929675687300531, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926950037479401, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993541472052273, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99564746061438, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966890964853136, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963700849758951, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981341183577713, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989821969678527, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.29.mlp": [ + { + "accuracy": 0.9404481461173609, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9420995273088154, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9500046246930173, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.952533012942264, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9695931296599539, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9720968070783114, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9756091880170923, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840719637117887, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855145843405473, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845257997512817, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864661724943864, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921175547336277, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932525142243034, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956845186258617, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959795420107088, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996663046510596, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988110300741697, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.29.self_attn": [ + { + "accuracy": 0.9715037534111425, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9737565486054671, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9766071407418502, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812315278931668, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849055785881845, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863429242058804, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881433970049808, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898982346057892, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910465510267961, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916794300079346, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927368015050888, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937818234688357, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936049541360453, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944625814494333, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960962581007105, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972272195706242, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966377478681112, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984076011337732, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991102790165889, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.3.mlp": [ + { + "accuracy": 0.981740780566868, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9822663382480019, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853066425574454, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986264202155565, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907602965831757, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915043664605994, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928495264367053, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951771846727321, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956207785167193, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952909240597173, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958804304662504, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976002495539816, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979463331401348, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986958742925995, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987758635298202, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990565595462134, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999654470869389, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.3.self_attn": [ + { + "accuracy": 0.9888342532672381, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893960066531834, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911319244849054, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933723375985497, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942439359269644, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943173684571919, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995643920804325, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957242000259852, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963278546929359, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965106542957457, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972090040774721, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974116853585369, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974972119456843, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977150592756899, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986149686339655, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988692396957624, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998911679085148, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994107184343433, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997029912560002, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.30.mlp": [ + { + "accuracy": 0.9396271705627441, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9413617033707469, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9490943456950941, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9516040180858812, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9692509676280775, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9717501088192588, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9751703158805245, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838352015143946, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852625335517683, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.98435062797446, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862975823251825, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920449821572555, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931709444836566, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956220717806565, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959494753887779, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966221071387592, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987838225145089, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.30.self_attn": [ + { + "accuracy": 0.9667385032302455, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9725132710055301, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9768114168392984, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795741768259751, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844756534225062, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872834800105346, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867940247058868, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897896780779487, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915145180727306, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917789619219931, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935267559791866, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939868073714407, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939513108447978, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949342977059515, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967327308105794, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973347367424714, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972458854317665, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998569085017631, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991971481297361, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.31.mlp": [ + { + "accuracy": 0.9386836729551616, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.940448986856561, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9482315496394509, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9508594494116933, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9687093308097438, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9712589822317425, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747550973766729, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835130650746194, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849932036901775, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840773845973768, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860624608240629, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919014949547617, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930502819387537, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955286575775397, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958789101556728, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996572439607821, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987582826104603, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.31.self_attn": [ + { + "accuracy": 0.9706535590322394, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.971916841833215, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9753407726162359, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9802370338063491, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838710348857077, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985148465947101, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877074768668727, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892291222748003, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904889354580327, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910721261250345, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924280792474747, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993188465112134, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932238749767605, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940440588091549, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996054256040799, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969388615143927, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967709634649126, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983287077201041, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990776063580262, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.32.mlp": [ + { + "accuracy": 0.9381078795382851, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9399199234811884, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9476236010852613, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9502744047265304, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9683981757414968, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9709749692364743, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9744653576298764, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832617515011838, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847716312659415, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.983919272297307, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859184434539393, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918199016859657, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929768050971784, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954633057901734, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958375292389017, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965330370162663, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998727327898929, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.32.self_attn": [ + { + "accuracy": 0.9742224467428107, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.975288998139532, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791773557662964, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826689613492865, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856405117009815, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870561180930388, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886995663768366, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904634058475494, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919879632560831, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923033792721597, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932960405161506, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941587612817162, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994131318048427, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994929847748656, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963234800257181, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974023182141153, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996774097806529, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985451700263902, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990526899499329, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.33.mlp": [ + { + "accuracy": 0.9350592086189672, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9370501731571398, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9452478760167172, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.948058197372838, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9668941842882257, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9695927720320853, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.97329643839284, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824439067589609, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840164341424641, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9831555768063194, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852529262241564, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914309351067794, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926462996947137, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952400428684134, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956457097279398, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963836928731516, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986709896475077, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.33.self_attn": [ + { + "accuracy": 0.969009490389573, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9702833232126737, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9746425434162742, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794407251634096, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837509048612494, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846261253482417, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880636522644445, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891319596453717, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904390408804542, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906452384434248, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922102556416863, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927523136138916, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931847053138834, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937020285349143, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961006076712358, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967935155881079, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968540921415153, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982306100428104, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999170109727665, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.34.mlp": [ + { + "accuracy": 0.9341764450073242, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9361098942003752, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9444079399108887, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9472868568018863, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9663833913050199, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9691510231871354, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9728985083730597, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821546375751495, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837611568601508, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828659452890095, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850147288096579, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912731247512918, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925289232479898, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951576292514801, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955542428713096, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996310661497869, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998646873098455, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.34.self_attn": [ + { + "accuracy": 0.9564421365135595, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9634804819759569, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9670375177734777, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9729942397067421, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789152161071175, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805382882293902, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835406196744818, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855260582346665, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882311475904364, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888038917591697, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898756766005566, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913231142257389, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908160669238943, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923511304353413, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994463024562911, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961122086173609, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952292273703375, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976951340703588, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987169443384597, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.35.mlp": [ + { + "accuracy": 0.9313121908589413, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9334871894434879, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9418660590523168, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9448951294547633, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9649099299782201, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9678272699054918, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9716648051613256, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812323670638236, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829328781680057, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821220683424097, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843680905668359, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909031665638873, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921999652134744, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948959491754833, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953697998272745, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961527025229052, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985512493080214, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.35.self_attn": [ + { + "accuracy": 0.9670090236161885, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.970254731805701, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9739479202973215, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789752222989735, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.982950569767701, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842907300120906, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867574556877738, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884398720766369, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896635068090338, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901157766580582, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918090748159509, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926922862467012, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926638391457105, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936671790323759, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956654616092381, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967378238706213, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996332765409821, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980602436944058, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989990249864364, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.36.mlp": [ + { + "accuracy": 0.9258465453198081, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9281125194148013, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9374217422384965, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9408036219446283, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9619908991612887, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9651511844835783, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9694491436606959, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9796112822858911, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814794832154324, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805625410456407, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.983014608684339, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900747427814885, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915057522685904, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944454498196903, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949316511812963, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958471279395255, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984371451366889, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.36.self_attn": [ + { + "accuracy": 0.9576083045256765, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.961531871243527, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.966211265639255, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9725723078376368, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789847762961137, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799127719904247, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842329542887839, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985140494610134, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871171556021038, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876823464506551, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892749849118685, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907578449500235, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905500200233961, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919212107595644, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944216287449786, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959359043522885, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952969719704828, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976547174155712, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987337177521304, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.37.mlp": [ + { + "accuracy": 0.9197051023182116, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9226812249735782, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9326293092024953, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9361098879262021, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9587310615338778, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9624839989762557, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9670872845147785, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778990808286165, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798377171943062, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9788021460959786, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816416956876454, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891134939695659, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907918039121126, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938229608692621, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944105273798892, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953712468084536, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982510097324848, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.37.self_attn": [ + { + "accuracy": 0.9511888717350206, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9545753974663584, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.96056530350133, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9701054911864432, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9754852166301325, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.976137997288453, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838919827812597, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846533552596444, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986660111891596, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870989056009996, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879146328097895, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887559970742777, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896771523513292, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905945250862523, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942842718017729, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952773863547727, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957939802031768, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973198030340044, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988353059283996, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.38.mlp": [ + { + "accuracy": 0.903786207500257, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9079556904341045, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9182301571494654, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9218543015028301, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9503825150038067, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9558806137034768, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9605097519723993, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9729712417251185, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.975299126223514, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743304283995378, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9780374592856357, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866851834874404, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888524499378706, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919227887141077, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993237106423629, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942481690331509, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977288626526531, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.38.self_attn": [ + { + "accuracy": 0.9558473103924802, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9604166746139526, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9650336503982544, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9713513161006727, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777860374827134, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784937893089495, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832271635532379, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840288946503087, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860397075351915, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869508382521177, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889419518019024, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896916237316633, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901790987504157, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906491637229919, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994303428420895, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953330018018421, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953164101431244, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974843767520628, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987293963173502, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.39.mlp": [ + { + "accuracy": 0.884964340611508, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8898815732253225, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9001276304847315, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9048946844904047, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9410798424168637, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9466547024877447, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9512439872089186, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9675213914168508, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9705433092619243, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9696893064599288, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9738192495546842, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843499299726988, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867396056652069, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990670371996729, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991942895086188, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931395579325525, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969983855752569, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.39.self_attn": [ + { + "accuracy": 0.9530802808309856, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9535483372838873, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.96006861485933, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9676298373623898, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791747629642487, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797391954221224, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851138560395492, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857763146099291, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844428928274858, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882658014172002, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989532313064525, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899004870339444, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903670502336401, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915840343425149, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946700499245995, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952122024015376, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995787958957647, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972780339027706, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987199521182399, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.4.mlp": [ + { + "accuracy": 0.976619828688471, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.977325505331943, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813207340867895, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825559697653118, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881810381224281, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891381091193149, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908692656378997, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938141084031055, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943785251755464, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939748299749274, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947282997401137, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969287508804547, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973723698211344, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983275026867264, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984265314905267, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987912532922468, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995408535591865, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.4.self_attn": [ + { + "accuracy": 0.9850203865452817, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856398811465815, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878977027378584, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906079745606372, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991500859982089, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925714590047535, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931935048417041, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944382651071799, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953042355022932, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953812960731355, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961569309234619, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963687987704026, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965373171787513, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967289823842677, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979427721547453, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984675218400202, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982599123920265, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992528140152755, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999510843208746, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.5.mlp": [ + { + "accuracy": 0.9712720481972945, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9721320679313258, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770509004592896, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785634922353845, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854518630002674, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866324647476798, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887599192167583, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923665037280635, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930698024599176, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925851798371265, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935133119947032, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962227148445029, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967673669912314, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979394674301147, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980750536840213, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985234258010199, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994525038392136, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.5.self_attn": [ + { + "accuracy": 0.985112936873185, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856448989165457, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875965792881815, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907490171884236, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921784816603911, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924286352960687, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941961184928292, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944547359880648, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954753000485269, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956641259946322, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963084494597033, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965561875387242, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966562955003035, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970121944421216, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981288374646714, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984411653131247, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985524725757147, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991165496605007, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996113682067708, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.6.mlp": [ + { + "accuracy": 0.9654162181051154, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.966520629431072, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9725514368007058, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743659449251074, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825889483878487, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840039030501717, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865739612202895, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909014372449172, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917139112949371, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911253632683503, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922383796227606, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995479689616906, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961320181426249, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975362418121413, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976938043377901, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982296963663477, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993475756951069, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.6.self_attn": [ + { + "accuracy": 0.980349998725088, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811813988183674, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9831307797055495, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876398263793242, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990034240641092, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902895461571845, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931636389933134, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934716448187828, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938244407898501, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941672561200041, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949610080373915, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953375768504644, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955592406423468, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959438956881824, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975572689191291, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997878182287279, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982060407729525, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987686797976494, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995210350147987, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.7.mlp": [ + { + "accuracy": 0.9623034345476251, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9634735019583451, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9699289359544453, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9718472455677233, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9810375979072169, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825705038873773, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852849056846217, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901141085122761, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909696241742686, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903205311612079, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915288075020439, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950600838974902, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995778251243265, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997311900320806, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974763497318092, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980527221372253, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992882232426813, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.7.self_attn": [ + { + "accuracy": 0.9757609665393829, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9768081432894656, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.979181590833162, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984820157289505, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876082053309992, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878436475992203, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918321355393058, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921410405322125, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929545137443041, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932903553310194, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939029773599223, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943824422202612, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947479308435792, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995179337106253, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970138994486708, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975681928427595, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997857037343477, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998659337037488, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994116492177311, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.8.mlp": [ + { + "accuracy": 0.960069759895927, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9612650996760318, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9680225158992567, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9700486754116259, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798129263677096, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814630605672535, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843260332157737, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894492202683499, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903601478589209, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896807976459202, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909736663103104, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947286393297347, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954941609972402, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971193028123755, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973101317882538, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979240300231859, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992358041927218, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.8.self_attn": [ + { + "accuracy": 0.9743400090619138, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9755802593733135, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9780785362971457, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840649789885471, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865981748229579, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869472407980969, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909443565105137, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913867680650008, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925768053845355, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929729205997366, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934218541571969, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939911141991615, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942523370447912, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994760647023979, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968275694470656, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973559024694719, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977823333128503, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985071756926022, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994019334645647, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.9.mlp": [ + { + "accuracy": 0.9597906909490886, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492374592, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9610492963539927, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 510724672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9677206842522872, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 569864704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9697240842016119, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 639496704, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794060180061742, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 721045344, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812799896064558, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 740855808, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840783555256692, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 796587104, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989217957383708, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 910810592, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99009329237436, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 924225536, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894380773368635, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 937477984, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908523410558701, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 957288448, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946709188975786, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1153910624, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954231024572724, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1173721088, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970566555857658, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1336788832, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972734561091975, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1380525056, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978815089715155, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1505043456, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991837522309077, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1769284608, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.9.self_attn": [ + { + "accuracy": 0.9694139706461053, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 111655168, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9706405338488127, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 114997504, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9734053454901043, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 119288192, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.980695073541842, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 139930496, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838730065446151, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165321856, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841944794905814, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 165487616, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892322719097137, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 211983488, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896934887296275, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 212149248, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907958742819334, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 213960704, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915260790210021, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 216920576, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919128661092959, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 217916416, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926596817217375, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 219400192, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929266070064745, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 223787264, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993680255977731, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 226914816, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959792200671999, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 274898048, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968277210075605, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 279343616, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970611753432375, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 316841088, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998286588803718, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 332263936, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999205489838986, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 421698688, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.norm.norm": null + } +} \ No newline at end of file