RyanYr's picture
Save model at global step 720
0d4d417 verified
raw
history blame
3.31 kB
data:
tokenizer: null
train_files: numina_ds_train_sample.parquet
val_files: matheval.parquet
prompt_key: prompt
max_prompt_length: 1024
max_response_length: 2048
train_batch_size: 32
val_batch_size: 640
return_raw_input_ids: false
return_raw_chat: false
shuffle: true
apply_chat_template: true
actor_rollout_ref:
hybrid_engine: true
model:
path: Qwen/Qwen2.5-Math-7B
revision: main
external_lib: null
override_config: {}
enable_gradient_checkpointing: true
use_remove_padding: false
save_hf_repo_id: RyanYr/brm-numina-qwen2.5math-7B-base-lr5e-7constant-n4
actor:
loss: brm
brm:
norm_factor: value
qlearn:
use_replaybuffer: true
replaybuffer_size: 32
replaybuffer_sample_size: 32
replaybuffer_update_size: 32
replaybuffer_update_reward_threshold: 1.0
strategy: fsdp
ppo_mini_batch_size: 64
ppo_micro_batch_size: null
ppo_micro_batch_size_per_gpu: 4
use_dynamic_bsz: false
ppo_max_token_len_per_gpu: 16384
grad_clip: 1.0
clip_ratio: 0.2
entropy_coeff: 0.001
use_kl_loss: false
kl_loss_coef: 0.001
kl_loss_type: low_var_kl
ppo_epochs: 1
shuffle: false
ulysses_sequence_parallel_size: 1
optim:
lr: 5.0e-07
lr_warmup_steps_ratio: 0
min_lr_ratio: null
warmup_style: constant
total_training_steps: 12496
fsdp_config:
wrap_policy:
min_num_params: 0
param_offload: false
optimizer_offload: false
fsdp_size: -1
ref:
fsdp_config:
param_offload: false
wrap_policy:
min_num_params: 0
log_prob_micro_batch_size: null
log_prob_micro_batch_size_per_gpu: 4
log_prob_use_dynamic_bsz: false
log_prob_max_token_len_per_gpu: 16384
ulysses_sequence_parallel_size: 1
rollout:
name: vllm
temperature: 1.0
top_k: -1
top_p: 1
prompt_length: 1024
response_length: 2048
dtype: bfloat16
gpu_memory_utilization: 0.6
ignore_eos: false
enforce_eager: true
free_cache_engine: true
load_format: dummy_dtensor
tensor_model_parallel_size: 4
max_num_batched_tokens: 8192
max_num_seqs: 1024
log_prob_micro_batch_size: null
log_prob_micro_batch_size_per_gpu: 4
log_prob_use_dynamic_bsz: false
log_prob_max_token_len_per_gpu: 16384
disable_log_stats: true
enable_chunked_prefill: true
do_sample: true
'n': 4
seed: 42
reward_model:
reward_manager: prime
extra_rwfn_argdict:
format_score: 0
score: 1.0
algorithm:
kl_penalty: kl
kl_ctrl:
type: fixed
kl_coef: 0.1
trainer:
total_epochs: 1
total_training_steps: null
project_name: value-LLM
experiment_name: brm-numina-qwen2.5math-7B-base_lr5e-7constant-n4
logger:
- wandb
val_generations_to_log_to_wandb: 0
nnodes: 1
n_gpus_per_node: 4
save_freq: 80
resume_mode: auto
resume_from_path: false
test_freq: 20
critic_warmup: 0
default_hdfs_dir: null
remove_previous_ckpt_in_save: true
del_local_ckpt_after_load: false
default_local_dir: ./BRM
hf_token: null
resume_from_hf:
enable: true
hf_repo_id: RyanYr/brm-numina-qwen2.5math-7B-base-lr5e-7constant-n4
hf_token: null
revision: 1932c649200c975a4b3ae08f1ed04561da3736ff
val_before_train: true