RyanYr's picture
Save model at global step 100
c77933d verified
data:
tokenizer: null
train_files: numina_ds_train_sample.parquet
val_files: matheval.parquet
prompt_key: prompt
max_prompt_length: 1024
max_response_length: 2048
train_batch_size: 128
val_batch_size: 640
return_raw_input_ids: false
return_raw_chat: false
shuffle: true
apply_chat_template: true
actor_rollout_ref:
hybrid_engine: true
model:
path: Qwen/Qwen2.5-Math-7B
external_lib: null
override_config: {}
enable_gradient_checkpointing: true
use_remove_padding: false
save_hf_repo_id: RyanYr/numina-qwen2.5math-7Bbase-ppo_actor
actor:
strategy: fsdp
ppo_mini_batch_size: 64
ppo_micro_batch_size: null
ppo_micro_batch_size_per_gpu: 2
use_dynamic_bsz: false
ppo_max_token_len_per_gpu: 16384
grad_clip: 1.0
clip_ratio: 0.2
entropy_coeff: 0.001
use_kl_loss: false
kl_loss_coef: 0.001
kl_loss_type: low_var_kl
ppo_epochs: 1
shuffle: false
ulysses_sequence_parallel_size: 1
optim:
lr: 1.0e-06
lr_warmup_steps_ratio: 0.0
min_lr_ratio: null
warmup_style: constant
total_training_steps: 3124
fsdp_config:
wrap_policy:
min_num_params: 0
param_offload: false
optimizer_offload: false
fsdp_size: -1
ref:
fsdp_config:
param_offload: false
wrap_policy:
min_num_params: 0
log_prob_micro_batch_size: null
log_prob_micro_batch_size_per_gpu: 2
log_prob_use_dynamic_bsz: false
log_prob_max_token_len_per_gpu: 16384
ulysses_sequence_parallel_size: 1
rollout:
name: vllm
temperature: 1.0
top_k: -1
top_p: 1
prompt_length: 1024
response_length: 2048
dtype: bfloat16
gpu_memory_utilization: 0.2
ignore_eos: false
enforce_eager: true
free_cache_engine: true
load_format: dummy_dtensor
tensor_model_parallel_size: 4
max_num_batched_tokens: 8192
max_num_seqs: 1024
log_prob_micro_batch_size: null
log_prob_micro_batch_size_per_gpu: 2
log_prob_use_dynamic_bsz: false
log_prob_max_token_len_per_gpu: 16384
disable_log_stats: true
enable_chunked_prefill: true
do_sample: true
'n': 1
seed: 42
critic:
strategy: fsdp
optim:
lr: 1.0e-05
lr_warmup_steps_ratio: 0.0
min_lr_ratio: null
warmup_style: constant
total_training_steps: 3124
model:
path: Qwen/Qwen2.5-Math-7B
tokenizer_path: Qwen/Qwen2.5-Math-7B
override_config: {}
external_lib: null
enable_gradient_checkpointing: true
use_remove_padding: false
fsdp_config:
param_offload: false
optimizer_offload: false
wrap_policy:
min_num_params: 0
fsdp_size: -1
save_hf_repo_id: RyanYr/numina-qwen2.5math-7Bbase-ppo_critic
ppo_mini_batch_size: 64
ppo_micro_batch_size: 4
ppo_micro_batch_size_per_gpu: null
forward_micro_batch_size: 4
forward_micro_batch_size_per_gpu: null
use_dynamic_bsz: false
ppo_max_token_len_per_gpu: 32768
forward_max_token_len_per_gpu: 32768
ulysses_sequence_parallel_size: 1
ppo_epochs: 1
shuffle: false
grad_clip: 1.0
cliprange_value: 0.5
reward_model:
enable: false
strategy: fsdp
model:
input_tokenizer: Qwen/Qwen2.5-Math-7B
path: ~/models/FsfairX-LLaMA3-RM-v0.1
external_lib: null
use_remove_padding: false
fsdp_config:
min_num_params: 0
param_offload: false
fsdp_size: -1
micro_batch_size: null
micro_batch_size_per_gpu: null
max_length: null
ulysses_sequence_parallel_size: 1
use_dynamic_bsz: false
forward_max_token_len_per_gpu: 32768
reward_manager: prime
algorithm:
gamma: 1.0
lam: 1.0
adv_estimator: gae
kl_penalty: kl
kl_ctrl:
type: fixed
kl_coef: 0.001
trainer:
total_epochs: 1
total_training_steps: null
project_name: value-LLM
experiment_name: ppo-numina-qwen2.5math-7Bbase
logger:
- wandb
val_generations_to_log_to_wandb: 0
nnodes: 1
n_gpus_per_node: 4
save_freq: 100
resume_mode: auto
resume_from_path: false
test_freq: 20
critic_warmup: 0
default_hdfs_dir: null
remove_previous_ckpt_in_save: true
del_local_ckpt_after_load: false
default_local_dir: ./PPO
hf_token: null
resume_from_hf:
enable: true
actor_hf_repo_id: RyanYr/numina-qwen2.5math-7Bbase-ppo_actor
actor_revision: aff7a826e225ddc044daec6d1f93f57e38b4f15b
critic_hf_repo_id: RyanYr/numina-qwen2.5math-7Bbase-ppo_critic
critic_revision: b98e588ce4c6c681ad584af7ef7506305647b205
hf_token: null
val_before_train: true