diff --git "a/job_new.json" "b/job_new.json" --- "a/job_new.json" +++ "b/job_new.json" @@ -1,6 +1,6 @@ { - "in_dir": "base_model", - "out_dir": "HuggingFaceTB/SmolLM2-135M-Instruct-EXL2", + "in_dir": "/content/SmolLM2-135M-Instruct", + "out_dir": "SmolLM2-135M-Instruct-EXL2", "cal_dataset": "wikitext-test.parquet", "bits": 4.5, "dataset_rows": 100, @@ -13,61824 +13,6 @@ "rope_scale": null, "rope_alpha": null, "output_measurement": null, - "progress": "finished", - "status_output": false, - "cal_filename": "HuggingFaceTB/SmolLM2-135M-Instruct-EXL2/cal_data.safetensors", - "last_module_idx": 62, - "measurement": { - "model.layers.0.self_attn": [ - { - "accuracy": 0.9653622061014175, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9656507521867752, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9736396297812462, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9789042361080647, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871607413515449, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864753047004342, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902706798166037, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904940519481897, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912318736314774, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919828437268734, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940058016218245, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942272333428264, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944152282550931, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951103604398668, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971425505355, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974891408346593, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976299838162959, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982806678162888, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993022565613501, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.0.mlp": [ - { - "accuracy": 0.7897239923477173, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.7996353209018707, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8281585574150085, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8371204733848572, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9048747718334198, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9075905606150627, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.91960858553648, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9477423280477524, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9517769888043404, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9520139917731285, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9558322839438915, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9757880140095949, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.978103106841445, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857291495427489, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9873036863282323, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898060876876116, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960855767130852, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.1.self_attn": [ - { - "accuracy": 0.9404392912983894, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9426095299422741, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.946966428309679, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9634836502373219, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9719871487468481, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9715181961655617, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9818352330476046, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9820242691785097, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9831722434610128, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9842823408544064, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855817724019289, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9853526772931218, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864199310541153, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875497296452522, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932851265184581, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937345124781132, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952340750023723, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966350719332695, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987135612173006, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.1.mlp": [ - { - "accuracy": 0.9016557484865189, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9047198370099068, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9158541187644005, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9194309040904045, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9528538882732391, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9546095207333565, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9600068144500256, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9740737527608871, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9762449711561203, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9758813213557005, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9778902214020491, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877292113378644, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9889425057917833, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929740149527788, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936254750937223, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947596765123308, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980985887814313, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.2.self_attn": [ - { - "accuracy": 0.9496447183191776, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9513713791966438, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9555250555276871, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9634813405573368, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9755331091582775, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9754074569791555, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9840496405959129, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9841677621006966, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9850920457392931, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9859835682436824, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9876130288466811, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9876447143033147, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988743850030005, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897214015945792, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942039451561868, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948350451886654, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957496467977762, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971585541497916, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998831557109952, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.2.mlp": [ - { - "accuracy": 0.9077477902173996, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9114615768194199, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9211274981498718, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9243806675076485, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9561802186071873, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.957623802125454, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9626831226050854, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9754375219345093, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9770376030355692, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9772618357092142, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9787111058831215, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883524794131517, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897069204598665, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931414322927594, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939953191205859, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950649430975318, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980990078765899, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.3.self_attn": [ - { - "accuracy": 0.9309663400053978, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9330538138747215, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9405779242515564, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9480898678302765, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9640389680862427, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9630477577447891, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9698454160243273, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9695344883948565, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9732844661921263, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9751601088792086, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.979167029261589, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9815531522035599, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9807593896985054, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.982931861653924, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855623058974743, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880614895373583, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9861280480399728, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954097527079284, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960047528147697, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.3.mlp": [ - { - "accuracy": 0.9058115407824516, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.908439539372921, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9191426485776901, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9226225987076759, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9532247260212898, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.954898364841938, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9601674266159534, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9743824042379856, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9765927102416754, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9762116931378841, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9780857916921377, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9879447687417269, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891038266941905, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931294987909496, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937500776723027, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948580302298069, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981851739576086, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.4.self_attn": [ - { - "accuracy": 0.9373606294393539, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9397291727364063, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9431961290538311, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9504978023469448, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9632214047014713, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9640652947127819, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9691083952784538, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9702157527208328, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9710072427988052, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9726806003600359, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9804122410714626, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9805784542113543, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9810363464057446, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9818874858319759, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9876024024561048, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899542033672333, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883359391242266, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951419718563557, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965073238126934, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.4.mlp": [ - { - "accuracy": 0.8987627476453781, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9017839878797531, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9132393226027489, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9169850051403046, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9498889818787575, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9516111202538013, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9571423791348934, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9724189843982458, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9748211838304996, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.974504055455327, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9764973446726799, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870818667113781, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9882942270487547, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925867123529315, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932925528846681, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944781307131052, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980386537499726, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.5.self_attn": [ - { - "accuracy": 0.9409872442483902, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9418883994221687, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9466007687151432, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9530569985508919, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9643559120595455, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9650397598743439, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9707599375396967, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9712178222835064, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9726341050118208, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.973127044737339, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9824034608900547, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.984048493206501, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9837718661874533, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857744798064232, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990403207950294, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921023324131966, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913744162768126, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956685421057045, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975484577007592, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.5.mlp": [ - { - "accuracy": 0.898022435605526, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9010524451732635, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9123967587947845, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9162064045667648, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9497192651033401, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9514362663030624, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9569934904575348, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9722369071096182, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9745967444032431, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9744039326906204, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9763468410819769, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9869906650856137, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9881984870880842, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925231952220201, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932358912192285, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944012928754091, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980014832690358, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.6.self_attn": [ - { - "accuracy": 0.9483311027288437, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9488541074097157, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9522876851260662, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.958598829805851, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9693667069077492, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.969318725168705, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.974604794755578, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9751371573656797, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9771039206534624, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9768935237079859, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9823072515428066, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9828989244997501, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9826217219233513, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9822991378605366, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877646928653121, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906566692516208, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883920270949602, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961583665572107, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970754899550229, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.6.mlp": [ - { - "accuracy": 0.8955627307295799, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8988570719957352, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9105149656534195, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9145743697881699, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9487475007772446, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9504835195839405, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9562518484890461, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9711200706660748, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9736437536776066, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9737806208431721, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9758805111050606, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9867040319368243, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880103236064315, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921366479247808, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931297092698514, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943689769133925, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979013849515468, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.7.self_attn": [ - { - "accuracy": 0.9363964647054672, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9374069273471832, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9415525421500206, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9472051709890366, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9650842025876045, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9667154960334301, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9709086660295725, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9722913894802332, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9771905858069658, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9773647580295801, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9819580670446157, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.984059527516365, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9822870045900345, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9859487935900688, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898270759731531, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991921853274107, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904757384210825, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942045034840703, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972843201830983, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.7.mlp": [ - { - "accuracy": 0.8950067013502121, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8982527032494545, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9101148024201393, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9142752885818481, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9482137709856033, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9499858915805817, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9558534882962704, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9710542801767588, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9735917933285236, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9736350458115339, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9756504762917757, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865649975836277, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9878600733354688, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992167416960001, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930641311220825, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943427541293204, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978933548554778, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.8.self_attn": [ - { - "accuracy": 0.9321105852723122, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9332313686609268, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9374388456344604, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.942411370575428, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9616459161043167, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9636161029338837, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9673176109790802, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9692027531564236, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9724457878619432, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9723567962646484, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9818043299019337, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9831144120544195, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9821334704756737, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9847136186435819, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875978222116828, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908908065408468, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883221024647355, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937662910670042, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997193539282307, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.8.mlp": [ - { - "accuracy": 0.895395427942276, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8987509682774544, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9108337387442589, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9150538444519043, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9484877251088619, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9504384137690067, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9564352482557297, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9709328953176737, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9736583642661572, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9736334998160601, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9757528156042099, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865987515076995, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9879121361300349, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921118337661028, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930419293232262, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943749676458538, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997863968135789, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.9.self_attn": [ - { - "accuracy": 0.9340836927294731, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9348594695329666, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9383697286248207, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9448546506464481, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.968759061768651, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9698476642370224, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9753817953169346, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9766593910753727, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9777390044182539, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9781889785081148, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9842864163219929, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9841099884361029, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9840985238552094, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9860007977113128, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924477431923151, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931063265539706, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938103095628321, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965387005358934, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982128986157477, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.9.mlp": [ - { - "accuracy": 0.894720584154129, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8980859592556953, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9105210602283478, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9149529486894608, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9477167539298534, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9497236609458923, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9557455107569695, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.970395028591156, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.973205866292119, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9731321763247252, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9754748176783323, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9863047078251839, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877331163734198, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918897952884436, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929302120581269, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942748714238405, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978188876993954, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.10.self_attn": [ - { - "accuracy": 0.9321816340088844, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9338596165180206, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9390655420720577, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9511623494327068, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9654741585254669, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9647916443645954, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.976581260561943, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9759913049638271, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.977791478857398, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9766034781932831, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9820583071559668, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829634800553322, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9840381424874067, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858452072367072, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911183258518577, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927606964483857, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930176925845444, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961318694986403, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982708934694529, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.10.mlp": [ - { - "accuracy": 0.8966403603553772, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8998933658003807, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9119467735290527, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9161941632628441, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9487372674047947, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9508645348250866, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9567949883639812, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9710837714374065, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9737328458577394, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9735096972435713, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9760545901954174, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865744085982442, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880222668871284, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920114250853658, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930830551311374, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944096109829843, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978510767687112, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.11.self_attn": [ - { - "accuracy": 0.9312568008899689, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9332301467657089, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9405637681484222, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9579248949885368, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9649055600166321, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9648430123925209, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9790281932801008, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9795422255992889, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9813254233449697, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9824243802577257, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9821619410067797, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9828764609992504, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9846223322674632, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9860872607678175, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919869741424918, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930214849300683, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945889986120164, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963408834300935, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985773399239406, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.11.mlp": [ - { - "accuracy": 0.875226154923439, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8800085708498955, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8776943162083626, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.878037765622139, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9859139826148748, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9839992877095938, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9853359190747142, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899091525003314, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895061971619725, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929467467591166, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936166508123279, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961359316948801, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968130241613835, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962243584450334, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976192028261721, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979205501731485, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998629147768952, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.12.self_attn": [ - { - "accuracy": 0.9769604727625847, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9771853219717741, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9787843525409698, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9802410639822483, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9841982051730156, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9846875499933958, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9851601226255298, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857962345704436, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855287540704012, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9838211126625538, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9903182601556182, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98999316804111, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990129740908742, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902943260967731, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947103685699403, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962444126140326, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948436468839645, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978676775936037, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985022551845759, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.12.mlp": [ - { - "accuracy": 0.9781476985663176, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9788574893027544, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9815687015652657, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9825597815215588, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890716709196568, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989517685957253, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908511815592647, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938438842073083, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944167356006801, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944096454419196, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948785491287708, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971508923918009, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974430950824171, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983102028490975, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985161948716268, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988223715918139, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995472824084572, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.13.self_attn": [ - { - "accuracy": 0.9775123447179794, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9779304452240467, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9786181915551424, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9805374052375555, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9816293548792601, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9813365992158651, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9827069714665413, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9824518170207739, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883041996508837, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988620444200933, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891266189515591, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992383630014956, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912554249167442, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992874032817781, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948537521995604, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959320267662406, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950215620920062, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975081605371088, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987124754115939, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.13.mlp": [ - { - "accuracy": 0.9784092828631401, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9791967142373323, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9819807410240173, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829925429075956, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892863854765892, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897093400359154, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910867009311914, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939384125173092, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944840976968408, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944948703050613, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949611537158489, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971913432236761, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974777870811522, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983320919563994, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985309997573495, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988422538153827, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995508964930195, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.14.self_attn": [ - { - "accuracy": 0.9722938239574432, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9723927937448025, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9742725621908903, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9759753085672855, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9805382322520018, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817833118140697, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9818325489759445, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834700748324394, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9867758862674236, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9850446097552776, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885521586984396, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917449690401554, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899688297882676, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918535286560655, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936125460080802, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959560642018914, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938511936925352, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976573828607798, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984385870629922, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.14.mlp": [ - { - "accuracy": 0.9774959404021502, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9783083982765675, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9811211936175823, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9821432549506426, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886806700378656, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892165157943964, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905572487041354, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993577154353261, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942223429679871, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941836046054959, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947144570760429, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970176124479622, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973409380763769, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982353881932795, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984348592115566, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987539889989421, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995187728782184, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.15.self_attn": [ - { - "accuracy": 0.9723721873015165, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9723578151315451, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9763910453766584, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9781720526516438, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9835366141051054, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9847817411646247, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9848032016307116, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9862431045621634, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9879387719556689, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888578504323959, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9887736244127154, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902081154286861, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98978874925524, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902772186324, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943024734966457, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956261580809951, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945723917335272, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996630092151463, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998740533250384, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.15.mlp": [ - { - "accuracy": 0.976024204865098, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.976870708167553, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9795254543423653, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.980513159185648, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9878587136045098, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885692838579416, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898539343848825, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930968354456127, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99380571115762, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993736450560391, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943760680034757, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967984706163406, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971887967549264, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980783136561513, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983266853960231, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986249717185274, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994762184796855, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.16.self_attn": [ - { - "accuracy": 0.9732388239353895, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.973774554207921, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9762641675770283, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9788840394467115, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.977979889139533, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9799960926175117, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9794833343476057, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9816496539860964, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865384884178638, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871418811380863, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913953989744186, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928999468684196, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923400641418993, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938350254669785, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950967528857291, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966306854039431, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952974757179618, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980227276682854, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990424872376025, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.16.mlp": [ - { - "accuracy": 0.9761743023991585, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9769750237464905, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9799803365021944, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9810711722820997, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988034313544631, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988614184781909, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900615978986025, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993232449516654, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938824004493654, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938532398082316, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944290709681809, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968685023486614, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972143981140107, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981403688434511, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983640406280756, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987043188884854, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999499257071875, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.17.self_attn": [ - { - "accuracy": 0.9669748805463314, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.967278603464365, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.96769854798913, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9698061812669039, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9750580601394176, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9754506628960371, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.976877560839057, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9776033610105515, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9790981486439705, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9824137073010206, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892283137887716, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990932559594512, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908454418182373, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898010706529021, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938992918469012, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955897023901343, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994321804959327, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971313427668065, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985124521190301, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.17.mlp": [ - { - "accuracy": 0.974860068410635, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9757176451385021, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9788082204759121, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.979938356205821, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874293627217412, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9879879523068666, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895431790500879, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992915294598788, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935739869251847, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935740288347006, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941605846397579, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967264039441943, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997085016220808, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980581399286166, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982934035360813, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986461247317493, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994760614354163, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.18.self_attn": [ - { - "accuracy": 0.9315434023737907, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9313673377037048, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9433160945773125, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9434045404195786, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9553163275122643, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9540189579129219, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9562645964324474, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9554661549627781, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9699995182454586, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9659288711845875, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9775503501296043, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9795052986592054, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.97765220887959, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9793077688664198, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9878052901476622, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917034665122628, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9879687186330557, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965921542607248, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971163047011942, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.18.mlp": [ - { - "accuracy": 0.9680065512657166, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9695253036916256, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9728687275201082, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9741323925554752, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9841342326253653, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.984962971881032, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9866522299125791, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910536343231797, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918090580031276, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918555468320847, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926641015335917, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995843056589365, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963288025464863, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974467759020627, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978316961787641, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998216473730281, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992877030163072, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.19.self_attn": [ - { - "accuracy": 0.9603110961616039, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9609277136623859, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9648818671703339, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9674716331064701, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9699026234447956, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9722255263477564, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9715757146477699, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9740222785621881, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9779431838542223, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9788480270653963, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9868513401597738, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885950069874525, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874283783137798, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9884060742333531, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925385406240821, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944804338738322, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928802661597729, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971248339861631, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980459667276591, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.19.mlp": [ - { - "accuracy": 0.9653163850307465, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9666154235601425, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9705671314150095, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9720665737986565, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9826719257980585, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9835127778351307, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854860184714198, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901278223842382, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910700246691704, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911152524873614, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919550213962793, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954845882020891, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959849901497364, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972838915418833, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976455580908805, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980974660720676, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992663748562336, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.20.self_attn": [ - { - "accuracy": 0.9561426229774952, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.95625339448452, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9635386988520622, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9659725837409496, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9746324848383665, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9776685833930969, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9782999139279127, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9821965955197811, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9831220656633377, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9845291851088405, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9867265010252595, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885336775332689, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870483186095953, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898217963054776, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912590812891722, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949732795357704, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9916610550135374, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976366953924298, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980734197888523, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.20.mlp": [ - { - "accuracy": 0.9543016888201237, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9563185311853886, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9611292742192745, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9630674123764038, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9773784186691046, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9784249886870384, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9808034747838974, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870169991627336, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9882188197225332, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883964294567704, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894273476675153, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940978940576315, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947226047515869, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963815102819353, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969187946990132, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974865547847003, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990122256567702, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.21.self_attn": [ - { - "accuracy": 0.9646971002221107, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9652721360325813, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9687629248946905, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9710514675825834, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9765988942235708, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9779532551765442, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9791927337646484, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9810482505708933, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9836639370769262, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9830789435654879, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9873690977692604, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899369664490223, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886166863143444, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908327953889966, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993660245090723, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951620572246611, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940826804377139, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971952612977475, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981937985867262, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.21.mlp": [ - { - "accuracy": 0.9589180909097195, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9603573642671108, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9654729925096035, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9674650654196739, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9795741643756628, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9803628884255886, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829608183354139, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9882833734154701, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894305132329464, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895813502371311, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904451668262482, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946969379670918, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952350058592856, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967974829487503, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972423545550555, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978365157730877, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991303741117008, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.22.self_attn": [ - { - "accuracy": 0.9622990898787975, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9626810625195503, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9661745242774487, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9695497881621122, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9769308269023895, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.978640154004097, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9805964510887861, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9825196880847216, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9847725164145231, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9843764817342162, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880260303616524, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895689664408565, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888515574857593, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904462778940797, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923867816105485, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954934315755963, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928916674107313, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972705678083003, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985436694696546, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.22.mlp": [ - { - "accuracy": 0.9489712119102478, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9509580582380295, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9564667567610741, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9587768763303757, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9747653026133776, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9757122322916985, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9785071145743132, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9853634415194392, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9867605278268456, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870984582230449, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9881759956479073, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934058208018541, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940739227458835, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959706687368453, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965630366932601, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972328415606171, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988899948075414, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.23.self_attn": [ - { - "accuracy": 0.9345800653100014, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9360465332865715, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.934687964618206, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.938193779438734, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9497336000204086, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9509040601551533, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9535694420337677, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.955462884157896, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9631003029644489, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9665821306407452, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.974414998665452, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9779252298176289, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9763467777520418, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9794817380607128, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864036813378334, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891995070502162, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9872314892709255, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944212981499732, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965831858571619, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.23.mlp": [ - { - "accuracy": 0.9445536509156227, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9470214322209358, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9534319341182709, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9559730142354965, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9729294553399086, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9737473279237747, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.977052815258503, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9841806776821613, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855679515749216, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9862171187996864, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9872222421690822, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992981483694166, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936264492571354, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956631208769977, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963738464284688, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971095169894397, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987903304863721, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.24.self_attn": [ - { - "accuracy": 0.9434301555156708, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9445074237883091, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9486190229654312, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.952824279665947, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9628100991249084, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9655949622392654, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9662239961326122, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9691413193941116, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9642912298440933, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.965289331972599, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.976302731782198, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9822550117969513, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9765516743063927, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9828138314187527, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900774918496609, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908045157790184, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990554129704833, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966863535810262, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974155875388533, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.24.mlp": [ - { - "accuracy": 0.9400363005697727, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9428493194282055, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9492817260324955, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.951792161911726, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9708214029669762, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9717514552175999, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9750669468194246, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9832387138158083, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9846452893689275, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9851180734112859, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98628546949476, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924124917015433, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931620326824486, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953095130622387, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960450339131057, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967615341302007, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986945323180407, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.25.self_attn": [ - { - "accuracy": 0.9553123004734516, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9561666660010815, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.962991252541542, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9662357605993748, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9741081092506647, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9760416280478239, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9763949662446976, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9784641265869141, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9860459482297301, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855982335284352, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880350967869163, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908542837947607, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98965326230973, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991416223347187, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993717601057142, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955263338051736, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941217796877027, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980696766870096, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984997563296929, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.25.mlp": [ - { - "accuracy": 0.9414506256580353, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9431392773985863, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9499778114259243, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9525086134672165, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9709824845194817, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9718742687255144, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9754325021058321, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98378418199718, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9851860506460071, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9852406727150083, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9863474546000361, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925237689167261, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931926326826215, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956390825100243, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996124918339774, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968730995897204, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988279293756932, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.26.self_attn": [ - { - "accuracy": 0.9490628689527512, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9495618529617786, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.95834531635046, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9610902927815914, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9708157554268837, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9722082726657391, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9734915811568499, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9750107061117887, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9810928553342819, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817975275218487, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857687270268798, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9889037329703569, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9861329607665539, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888241868466139, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926628698594868, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948058491572738, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930718587711453, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971061090473086, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979473259299994, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.26.mlp": [ - { - "accuracy": 0.9354912340641022, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9374528527259827, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9450247138738632, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9478339366614819, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.967911921441555, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9689346514642239, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9727319572120905, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9818856455385685, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9835068881511688, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9836423229426146, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9848849065601826, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99169069994241, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924695435911417, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951112898997962, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956881711259484, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965273137204349, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986887727864087, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.27.self_attn": [ - { - "accuracy": 0.948082834482193, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9488292038440704, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9565153121948242, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9598666951060295, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9607742205262184, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9625872671604156, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9629640504717827, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9646806977689266, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9744205307215452, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.968675758689642, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9828311204910278, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9849596228450537, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844594039022923, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9863245459273458, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911126457154751, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922510460019112, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915196811780334, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944717460311949, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977023606188595, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.27.mlp": [ - { - "accuracy": 0.9338333159685135, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9356804862618446, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9429323747754097, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9456240125000477, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9668678008019924, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9680893644690514, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9716968536376953, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9808992575854063, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9824693761765957, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9827363956719637, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9841680098325014, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911122238263488, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920578729361296, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946132758632302, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952815175056458, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960157787427306, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981621213955805, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.28.self_attn": [ - { - "accuracy": 0.9464119076728821, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9472222551703453, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9524141326546669, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9541854299604893, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9713618904352188, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.974262997508049, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9745627399533987, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9776313323527575, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844087259843946, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9843648690730333, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.986209200695157, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9893193868920207, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871587986126542, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9887126432731748, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928587442263961, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955113758333027, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932015892118216, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975364082492888, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981538228457794, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.28.mlp": [ - { - "accuracy": 0.8998142927885056, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9036422446370125, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9115893542766571, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9138708338141441, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.954499613493681, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9629295691847801, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9672028869390488, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9754518177360296, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9785779193043709, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9789060018956661, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9818937815725803, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9810802470892668, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886301169171929, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9827606193721294, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940126040019095, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947616397403181, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963894509710371, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.29.self_attn": [ - { - "accuracy": 0.9500799849629402, - "total_bits": 1998528, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9517067894339561, - "total_bits": 2023104, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9576395824551582, - "total_bits": 2118144, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9644648805260658, - "total_bits": 2477376, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9727642573416233, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9734974540770054, - "total_bits": 2947200, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.979119174182415, - "total_bits": 3644352, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9804393164813519, - "total_bits": 3647616, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9820635411888361, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9821135066449642, - "total_bits": 3726720, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9852963294833899, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874159069731832, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9872207008302212, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892830261960626, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930798751302063, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942958098836243, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939773418009281, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963655602186918, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983942653052509, - "total_bits": 7183296, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.29.mlp": [ - { - "accuracy": 0.8869123533368111, - "total_bits": 6169152, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8911373913288116, - "total_bits": 6365760, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8954514041543007, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.8972769007086754, - "total_bits": 7836768, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9458927102386951, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.945903766900301, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9479703269898891, - "total_bits": 9756816, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9611603245139122, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9602623283863068, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9718689676374197, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.972653791308403, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9849440949037671, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865752784535289, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880935410037637, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932393822818995, - "total_bits": 17182752, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938597362488508, - "total_bits": 18657312, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967051034327596, - "total_bits": 21409824, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.norm.norm": null, - "lm_head.linear": null - }, - "strategy": { - "model.layers.0.self_attn": { - "accuracy": 0.9944152282550931, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.0.mlp": { - "accuracy": 0.9857291495427489, - "total_bits": 16219056, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "model.layers.1.self_attn": { - "accuracy": 0.9855817724019289, - "total_bits": 3831936, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.1.mlp": { - "accuracy": 0.9778902214020491, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.2.self_attn": { - "accuracy": 0.9876447143033147, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.2.mlp": { - "accuracy": 0.9897069204598665, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.3.self_attn": { - "accuracy": 0.9954097527079284, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - "model.layers.3.mlp": { - "accuracy": 0.9891038266941905, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.4.self_attn": { - "accuracy": 0.9951419718563557, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - "model.layers.4.mlp": { - "accuracy": 0.9882942270487547, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.5.self_attn": { - "accuracy": 0.9921023324131966, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.5.mlp": { - "accuracy": 0.9745967444032431, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "model.layers.6.self_attn": { - "accuracy": 0.9961583665572107, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - "model.layers.6.mlp": { - "accuracy": 0.9880103236064315, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.7.self_attn": { - "accuracy": 0.9904757384210825, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - "model.layers.7.mlp": { - "accuracy": 0.9878600733354688, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.8.self_attn": { - "accuracy": 0.9908908065408468, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.8.mlp": { - "accuracy": 0.9879121361300349, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.9.self_attn": { - "accuracy": 0.9931063265539706, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.9.mlp": { - "accuracy": 0.9877331163734198, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.10.self_attn": { - "accuracy": 0.9927606964483857, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.10.mlp": { - "accuracy": 0.9865744085982442, - "total_bits": 14357424, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.11.self_attn": { - "accuracy": 0.9963408834300935, - "total_bits": 5717376, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - "model.layers.11.mlp": { - "accuracy": 0.9859139826148748, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.12.self_attn": { - "accuracy": 0.9841982051730156, - "total_bits": 2956224, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.12.mlp": { - "accuracy": 0.9815687015652657, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "model.layers.13.self_attn": { - "accuracy": 0.992874032817781, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.13.mlp": { - "accuracy": 0.9819807410240173, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "model.layers.14.self_attn": { - "accuracy": 0.9867758862674236, - "total_bits": 3677040, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - "model.layers.14.mlp": { - "accuracy": 0.9892165157943964, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.15.self_attn": { - "accuracy": 0.9902772186324, - "total_bits": 3923328, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.15.mlp": { - "accuracy": 0.9878587136045098, - "total_bits": 9159600, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.16.self_attn": { - "accuracy": 0.9928999468684196, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.16.mlp": { - "accuracy": 0.9799803365021944, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "model.layers.17.self_attn": { - "accuracy": 0.9908454418182373, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.17.mlp": { - "accuracy": 0.9788082204759121, - "total_bits": 7007328, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "model.layers.18.self_attn": { - "accuracy": 0.9917034665122628, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.18.mlp": { - "accuracy": 0.984962971881032, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.19.self_attn": { - "accuracy": 0.9885950069874525, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.19.mlp": { - "accuracy": 0.9835127778351307, - "total_bits": 9097152, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.20.self_attn": { - "accuracy": 0.9949732795357704, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.20.mlp": { - "accuracy": 0.9870169991627336, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "model.layers.21.self_attn": { - "accuracy": 0.9899369664490223, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.21.mlp": { - "accuracy": 0.9894305132329464, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "model.layers.22.self_attn": { - "accuracy": 0.9895689664408565, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.22.mlp": { - "accuracy": 0.9853634415194392, - "total_bits": 11101728, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "model.layers.23.self_attn": { - "accuracy": 0.9891995070502162, - "total_bits": 4808064, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.23.mlp": { - "accuracy": 0.9855679515749216, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "model.layers.24.self_attn": { - "accuracy": 0.990554129704833, - "total_bits": 5413824, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - "model.layers.24.mlp": { - "accuracy": 0.9851180734112859, - "total_bits": 11758512, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.25.self_attn": { - "accuracy": 0.9908542837947607, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.25.mlp": { - "accuracy": 0.9851860506460071, - "total_bits": 11259840, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "model.layers.26.self_attn": { - "accuracy": 0.9889037329703569, - "total_bits": 3775344, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.26.mlp": { - "accuracy": 0.9848849065601826, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.27.self_attn": { - "accuracy": 0.9844594039022923, - "total_bits": 3866304, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.27.mlp": { - "accuracy": 0.9841680098325014, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.28.self_attn": { - "accuracy": 0.9928587442263961, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.28.mlp": { - "accuracy": 0.9818937815725803, - "total_bits": 11696064, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - "model.layers.29.self_attn": { - "accuracy": 0.9930798751302063, - "total_bits": 4811712, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "model.layers.29.mlp": { - "accuracy": 0.9865752784535289, - "total_bits": 14294976, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - } - }, - "q_last_module_idx": 62 + "progress": "begin", + "status_output": false } \ No newline at end of file