metadata
license: apache-2.0
language:
- en
- zh
base_model:
- Qwen/Qwen2.5-14B
- Azure99/Blossom-V6-14B
- arcee-ai/Virtuoso-Small-v2
- Qwen/Qwen2.5-14B-Instruct
- Qwen/Qwen2.5-14B-Instruct-1M
pipeline_tag: text-generation
tags:
- merge
model-index:
- name: ZYH-LLM-Qwen2.5-14B-V3
results:
- task:
type: text-generation
name: Text Generation
dataset:
name: IFEval (0-Shot)
type: HuggingFaceH4/ifeval
args:
num_few_shot: 0
metrics:
- type: inst_level_strict_acc and prompt_level_strict_acc
value: 85.78
name: strict accuracy
source:
url: >-
https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: BBH (3-Shot)
type: BBH
args:
num_few_shot: 3
metrics:
- type: acc_norm
value: 48.18
name: normalized accuracy
source:
url: >-
https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: MATH Lvl 5 (4-Shot)
type: hendrycks/competition_math
args:
num_few_shot: 4
metrics:
- type: exact_match
value: 52.72
name: exact match
source:
url: >-
https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: GPQA (0-shot)
type: Idavidrein/gpqa
args:
num_few_shot: 0
metrics:
- type: acc_norm
value: 10.96
name: acc_norm
source:
url: >-
https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: MuSR (0-shot)
type: TAUR-Lab/MuSR
args:
num_few_shot: 0
metrics:
- type: acc_norm
value: 9
name: acc_norm
source:
url: >-
https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: MMLU-PRO (5-shot)
type: TIGER-Lab/MMLU-Pro
config: main
split: test
args:
num_few_shot: 5
metrics:
- type: acc
value: 43.12
name: accuracy
source:
url: >-
https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3
name: Open LLM Leaderboard
ZYH-LLM-Qwen2.5-14B-V3
This is the third-generation model of the ZYH-LLM series.
It employs a large amount of model merging techniques, aiming to provide a powerful and unified 14-billion-parameter model, laying a solid foundation for further model merging and model fine-tuning.
As of February 25, 2025, the 14B model with the highest IFEval score
Open LLM Leaderboard Evaluation Results
Detailed results can be found here
Metric | Value |
---|---|
Avg. | 41.63 |
IFEval (0-Shot) | 85.78 |
BBH (3-Shot) | 48.18 |
MATH Lvl 5 (4-Shot) | 52.72 |
GPQA (0-shot) | 10.96 |
MuSR (0-shot) | 9.00 |
MMLU-PRO (5-shot) | 43.12 |
The following are the specific details of model merging, hoping to inspire you:
First stage:
Step 1:
models:
- model: Qwen/Qwen2.5-14B-Instruct
parameters:
density: 1
weight: 1
lambda: 0.9
merge_method: della
base_model: Qwen/Qwen2.5-14B
parameters:
density: 1
weight: 1
lambda: 0.9
normalize: true
int8_mask: true
dtype: bfloat16
tokenizer_source: base
name: Qwen2.5-14B-YOYO-1010
models:
- model: Qwen/Qwen2.5-14B-Instruct-1M
parameters:
density: 1
weight: 1
lambda: 0.9
merge_method: della
base_model: Qwen/Qwen2.5-14B
parameters:
density: 1
weight: 1
lambda: 0.9
normalize: true
int8_mask: true
dtype: bfloat16
tokenizer_source: base
name: Qwen2.5-14B-YOYO-1010-1M
models:
- model: Qwen/Qwen2.5-14B-Instruct
parameters:
density: 1
weight: 1
lambda: 0.9
merge_method: della
base_model: EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2
parameters:
density: 1
weight: 1
lambda: 0.9
normalize: true
int8_mask: true
dtype: bfloat16
tokenizer_source: base
name: EVA-Qwen2.5-14B-YOYO-1010
models:
- model: Qwen/Qwen2.5-14B-Instruct-1M
parameters:
density: 1
weight: 1
lambda: 0.9
merge_method: della
base_model: EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2
parameters:
density: 1
weight: 1
lambda: 0.9
normalize: true
int8_mask: true
dtype: bfloat16
tokenizer_source: base
name: EVA-Qwen2.5-14B-YOYO-1010-1M
Step 2:
models:
- model: EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2
parameters:
density: 1
weight: 1
lambda: 0.9
merge_method: della
base_model: Qwen/Qwen2.5-14B
parameters:
density: 1
weight: 1
lambda: 0.9
normalize: true
int8_mask: true
dtype: bfloat16
tokenizer_source: base
name: EVA-Qwen2.5-14B-base
merge_method: sce
models:
- model: EVA-Qwen2.5-14B-base
base_model: Qwen/Qwen2.5-14B-Instruct-1M
parameters:
select_topk: 1
dtype: bfloat16
tokenizer_source: base
normalize: true
int8_mask: true
name: Qwen2.5-14B-pro
Step 3:
models:
- model: Qwen2.5-14B-YOYO-1010-1M
- model: Qwen2.5-14B-YOYO-1010
- model: EVA-Qwen2.5-14B-YOYO-1010-1M
- model: EVA-Qwen2.5-14B-YOYO-1010
merge_method: sce
base_model: Qwen2.5-14B-pro
parameters:
normalize: true
int8_mask: true
dtype: bfloat16
tokenizer_source: base
name: ZYH-LLM-Qwen2.5-14B-V3-preview
Second stage:
models:
- model: Qwen/Qwen2.5-14B-Instruct
parameters:
density: 1
weight: 1
lambda: 0.9
merge_method: della
base_model: arcee-ai/Virtuoso-Small-v2
parameters:
density: 1
weight: 1
lambda: 0.9
normalize: true
int8_mask: true
dtype: bfloat16
tokenizer_source: base
name: Qwen2.5-14B-YOYO-della1
models:
- model: Qwen/Qwen2.5-14B-Instruct-1M
parameters:
density: 1
weight: 1
lambda: 0.9
merge_method: della
base_model: arcee-ai/Virtuoso-Small-v2
parameters:
density: 1
weight: 1
lambda: 0.9
normalize: true
int8_mask: true
dtype: bfloat16
tokenizer_source: base
name: Qwen2.5-14B-YOYO-della2
models:
- model: Qwen/Qwen2.5-14B-Instruct
parameters:
density: 1
weight: 1
lambda: 0.9
merge_method: della
base_model: Azure99/Blossom-V6-14B
parameters:
density: 1
weight: 1
lambda: 0.9
normalize: true
int8_mask: true
dtype: bfloat16
tokenizer_source: base
name: Qwen2.5-14B-YOYO-della3
models:
- model: Qwen/Qwen2.5-14B-Instruct-1M
parameters:
density: 1
weight: 1
lambda: 0.9
merge_method: della
base_model: Azure99/Blossom-V6-14B
parameters:
density: 1
weight: 1
lambda: 0.9
normalize: true
int8_mask: true
dtype: bfloat16
tokenizer_source: base
name: Qwen2.5-14B-YOYO-della4
Final stage:
merge_method: model_stock
base_model: ZYH-LLM-Qwen2.5-14B-V3-preview
models:
- model: Qwen2.5-14B-YOYO-della1
- model: Qwen2.5-14B-YOYO-della2
- model: Qwen2.5-14B-YOYO-della3
- model: Qwen2.5-14B-YOYO-della4
dtype: bfloat16
tokenizer_source: base
int8_mask: true
normalize: true
name: ZYH-LLM-Qwen2.5-14B-V3