slices: - sources: - model: bamec66557/MISCHIEVOUS-12B-Mix_0.1v layer_range: [0, 40] - model: bamec66557/MISCHIEVOUS-12B layer_range: [0, 40] parameters: t: - filter: self_attn value: [0.1, 0.3, 0.7, 0.9, 1.0] # Spikes for dramatic change - filter: mlp value: [1.0, 0.7, 0.4, 0.1, 0.0] # Conversely, a sharp decline - filter: layer_norm value: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, # First 10 layers 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, # The remaining 30 layers 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3] - value: 0.9 # Set the default merge ratio to high merge_method: slerp # Keep the merge method as SLERP base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.1v # Base model dtype: bfloat16 # Data types for fast merges # Additional options regularization: - method: l2_norm # Stabilise after merging with L2 normalisation scale: 0.005 # Reduce normalisation strength to allow for variation postprocessing: - operation: smoothing # Smoothing weights after merging kernel_size: 5 # Smoothing larger ranges with increased kernel size - operation: normalize # Normalise after merge