{ | |
"attention_layers": [ | |
22, | |
23, | |
24, | |
25 | |
], | |
"auto_mapping": null, | |
"base_model_name_or_path": "meta-llama/Llama-3.2-3B", | |
"cache_dtype": "float16", | |
"cache_size": 262144, | |
"cache_type": "FIFO", | |
"compression_factor": 12, | |
"context_size": 2, | |
"global_cache": false, | |
"inference_mode": true, | |
"neighborhood_size": 2, | |
"neurocache_type": "ONDEVICE", | |
"pooling_window": 1, | |
"retrieval_map": { | |
"22": 22 | |
}, | |
"retrieval_stride": 4, | |
"similarity_fn": "l2", | |
"task_type": "CAUSAL_LM", | |
"topk": 4 | |
} | |