Mistral-7B-LoRA-MeanRef-0.8-U13-0-1 / torchtune_config.yaml
arqa39's picture
Upload folder using huggingface_hub
b686860 verified
raw
history blame
2.66 kB
prefix: /home/arqa39/.torchtune
output_dir: /home/arqa39/.torchtune/fed_ppo/mistral_7b
tokenizer:
_component_: torchtune.models.mistral.mistral_tokenizer
path: ${prefix}/models/Mistral-7B-Instruct-v0.2/tokenizer.model
max_seq_len: null
dataset:
_component_: torchtune.datasets.text_completion_dataset
source: trl-internal-testing/sentiment-trl-style
split: train
column: prompt
add_eos: false
policy:
_component_: torchtune.models.mistral.lora_mistral_7b
lora_attn_modules:
- q_proj
- k_proj
- v_proj
- output_proj
apply_lora_to_mlp: true
apply_lora_to_output: false
lora_rank: 64
lora_alpha: 16
lora_dropout: 0.0
quantize_base: false
valmod:
_component_: torchtune.models.mistral._component_builders.lora_mistral_classifier
attn_dropout: 0.0
embed_dim: 4096
intermediate_dim: 14336
max_seq_len: 32768
norm_eps: 1.0e-05
num_classes: 1
num_heads: 32
num_kv_heads: 8
num_layers: 32
vocab_size: 32001
lora_attn_modules:
- q_proj
- k_proj
- v_proj
- output_proj
apply_lora_to_mlp: true
apply_lora_to_output: = True
lora_rank: 16
lora_alpha: 32
lora_dropout: 0.0
quantize_base: false
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: ${prefix}/models/Mistral-7B-Instruct-v0.2/
checkpoint_files:
- pytorch_model-00001-of-00003.bin
- pytorch_model-00002-of-00003.bin
- pytorch_model-00003-of-00003.bin
recipe_checkpoint: null
output_dir: ${output_dir}/policy
model_type: MISTRAL
value_checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: ${prefix}/models/RM-Mistral-7B/
checkpoint_files:
- model-00001-of-00003.safetensors
- model-00002-of-00003.safetensors
- model-00003-of-00003.safetensors
output_dir: ${output_dir}/value
model_type: REWARD
seed: 53710
shuffle: true
device: cuda
batch_size: 64
num_steps: 10000
ppo_epochs: 2
ppo_batch_size: 32
gradient_accumulation_steps: 1
compile: false
optimizer:
_component_: bitsandbytes.optim.PagedAdamW
lr: 0.0001
optimizer_in_bwd: true
log_peak_memory_stats: true
enable_activation_checkpointing: true
dtype: bf16
forward_batch_size: 16
max_generated_tokens: 58
temperature: 0.7
top_k: null
min_response_length: 18
penalise_no_eos: true
reward_penalty: -3
stop_token_ids:
- 2
- 28723
whiten_rewards: false
gamma: 1
lmbda: 0.95
loss:
_component_: torchtune.rlhf.loss.PPOLoss
epsilon: 0.2
value_coeff: 0.1
value_clip_range: 0.2
kl_coeff: 0.01
wandb_logger:
dir: ${prefix}
entity: RADFAN
project: FedPPO
group: SelfReference
name: Mistral-7B-LoRA-SelfRef-U13
log_every_n_steps: 1
update_ref_policy_every_n_steps: 13