{ "batchers": null, "cachers": null, "compilers": null, "distillers": null, "pruners": null, "quantizers": "llm-int8", "recoverers": null, "quant_llm-int8_compute_dtype": "bfloat16", "quant_llm-int8_double_quant": false, "quant_llm-int8_enable_fp32_cpu_offload": false, "quant_llm-int8_has_fp16_weight": false, "quant_llm-int8_quant_type": "fp4", "quant_llm-int8_threshold": 6.0, "quant_llm-int8_weight_bits": 4, "max_batch_size": 1, "device": "cuda", "cache_dir": "/tmp/models/tmp60v43lzs", "task": "", "save_load_fn": "llm-int8", "save_load_fn_args": {}, "api_key": null }