merge_method: dare_ties | |
base_model: djuna/Q2.5-Veltha-14B-0.5 | |
output_dtype: bfloat16 | |
data_type: bfloat16 | |
parameters: | |
epsilon: 0.0085 # Balanced between precision and flexibility. | |
lambda: 2.3 # Adjusted to emphasize impactful parameters without overfitting. | |
normalize: true # Ensures parameter normalization for stable integration. | |
rescale: true # Dynamically rescales parameters for optimal alignment. | |
int8_mask: true # Enables memory-efficient fine-tuning when applicable. | |
adaptive_merge_parameters: | |
task_weights: # Combined priority based on all configurations. | |
tinyArc: 1.7 | |
tinyHellaswag: 1.8 | |
tinyMMLU: 2.0 | |
tinyTruthfulQA: 2.8 | |
tinyTruthfulQA_mc1: 2.4 | |
tinyWinogrande: 2.1 | |
IFEval: 3.2 | |
BBH: 2.9 | |
MATH: 3.4 | |
GPQA: 2.6 | |
MUSR: 2.7 | |
MMLU-PRO: 2.5 | |
smoothing_factor: 0.025 # Precise parameter blending. | |
gradient_clipping: # Hybrid gradient clipping strategy. | |
djuna/Q2.5-Veltha-14B-0.5: 0.89 | |
CultriX/Qwen2.5-14B-Brocav3: 0.88 | |
CultriX/Qwen2.5-14B-Hyperionv3: 0.87 | |
CultriX/Qwen2.5-14B-Wernickev3: 0.88 | |
hotmailuser/QwenSlerp2-14B: 0.90 | |
allknowingroger/QwenSlerp6-14B: 0.86 | |
sometimesanotion/Lamarck-14B-v0.6: 0.88 | |
qingy2024/Fusion4-14B-Instruct: 0.91 | |
CultriX/Qwen2.5-14B-Brocav7: 0.88 | |
CultriX/SeQwence-14B-EvolMerge: 0.87 | |
models: | |
- model: djuna/Q2.5-Veltha-14B-0.5 | |
parameters: | |
weight: 0.28 # Backbone with strong reasoning capabilities. | |
density: 0.78 | |
- model: CultriX/Qwen2.5-14B-Brocav3 | |
parameters: | |
weight: 0.25 # High-performance reasoning and multitask contributions. | |
density: 0.76 | |
- model: CultriX/Qwen2.5-14B-Hyperionv3 | |
parameters: | |
weight: 0.18 # Balanced generalist for broad coverage. | |
density: 0.75 | |
- model: hotmailuser/QwenSlerp2-14B | |
parameters: | |
weight: 0.13 # Specialist in instruction-following and QA. | |
density: 0.72 | |
- model: sometimesanotion/Lamarck-14B-v0.6 | |
parameters: | |
weight: 0.10 # Multi-step reasoning and task-specific expert. | |
density: 0.65 | |
- model: qingy2024/Fusion4-14B-Instruct | |
parameters: | |
weight: 0.08 # Specialist in mathematical reasoning. | |
density: 0.78 | |
- model: CultriX/Qwen2.5-14B-Brocav7 | |
parameters: | |
weight: 0.08 # Focus on specific reasoning tasks. | |
density: 0.77 | |
- model: CultriX/SeQwence-14B-EvolMerge | |
parameters: | |
weight: 0.07 # Generalist for multitask integration. | |
density: 0.68 | |