Qwen2.5-1.5B-THREADRIPPER-v0.1 / mergekit_config.yml
Xiaojian9992024's picture
Upload folder using huggingface_hub
21ad269 verified
raw
history blame contribute delete
945 Bytes
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1