|
prefix: /home/arqa39/.torchtune |
|
output_dir: /home/arqa39/.torchtune/fed_ppo/mistral_7b |
|
tokenizer: |
|
_component_: torchtune.models.mistral.mistral_tokenizer |
|
path: ${prefix}/models/Mistral-7B-Instruct-v0.2/tokenizer.model |
|
max_seq_len: null |
|
dataset: |
|
_component_: torchtune.datasets.text_completion_dataset |
|
source: trl-internal-testing/sentiment-trl-style |
|
split: train |
|
column: prompt |
|
add_eos: false |
|
policy: |
|
_component_: torchtune.models.mistral.lora_mistral_7b |
|
lora_attn_modules: |
|
- q_proj |
|
- k_proj |
|
- v_proj |
|
- output_proj |
|
apply_lora_to_mlp: true |
|
apply_lora_to_output: false |
|
lora_rank: 64 |
|
lora_alpha: 16 |
|
lora_dropout: 0.0 |
|
quantize_base: false |
|
valmod: |
|
_component_: torchtune.models.mistral._component_builders.lora_mistral_classifier |
|
attn_dropout: 0.0 |
|
embed_dim: 4096 |
|
intermediate_dim: 14336 |
|
max_seq_len: 32768 |
|
norm_eps: 1.0e-05 |
|
num_classes: 1 |
|
num_heads: 32 |
|
num_kv_heads: 8 |
|
num_layers: 32 |
|
vocab_size: 32001 |
|
lora_attn_modules: |
|
- q_proj |
|
- k_proj |
|
- v_proj |
|
- output_proj |
|
apply_lora_to_mlp: true |
|
apply_lora_to_output: = True |
|
lora_rank: 16 |
|
lora_alpha: 32 |
|
lora_dropout: 0.0 |
|
quantize_base: false |
|
checkpointer: |
|
_component_: torchtune.training.FullModelHFCheckpointer |
|
checkpoint_dir: ${prefix}/models/Mistral-7B-Instruct-v0.2/ |
|
checkpoint_files: |
|
- pytorch_model-00001-of-00003.bin |
|
- pytorch_model-00002-of-00003.bin |
|
- pytorch_model-00003-of-00003.bin |
|
recipe_checkpoint: null |
|
output_dir: ${output_dir}/policy |
|
model_type: MISTRAL |
|
value_checkpointer: |
|
_component_: torchtune.training.FullModelHFCheckpointer |
|
checkpoint_dir: ${prefix}/models/RM-Mistral-7B/ |
|
checkpoint_files: |
|
- model-00001-of-00003.safetensors |
|
- model-00002-of-00003.safetensors |
|
- model-00003-of-00003.safetensors |
|
output_dir: ${output_dir}/value |
|
model_type: REWARD |
|
seed: 53710 |
|
shuffle: true |
|
device: cuda |
|
batch_size: 64 |
|
num_steps: 10000 |
|
ppo_epochs: 2 |
|
ppo_batch_size: 32 |
|
gradient_accumulation_steps: 1 |
|
compile: false |
|
optimizer: |
|
_component_: bitsandbytes.optim.PagedAdamW |
|
lr: 0.0001 |
|
optimizer_in_bwd: true |
|
log_peak_memory_stats: true |
|
enable_activation_checkpointing: true |
|
dtype: bf16 |
|
forward_batch_size: 16 |
|
max_generated_tokens: 58 |
|
temperature: 0.7 |
|
top_k: null |
|
min_response_length: 18 |
|
penalise_no_eos: true |
|
reward_penalty: -3 |
|
stop_token_ids: |
|
- 2 |
|
- 28723 |
|
whiten_rewards: false |
|
gamma: 1 |
|
lmbda: 0.95 |
|
loss: |
|
_component_: torchtune.rlhf.loss.PPOLoss |
|
epsilon: 0.2 |
|
value_coeff: 0.1 |
|
value_clip_range: 0.2 |
|
kl_coeff: 0.01 |
|
wandb_logger: |
|
dir: ${prefix} |
|
entity: RADFAN |
|
project: FedPPO |
|
group: SelfReference |
|
name: Mistral-7B-LoRA-SelfRef-U13 |
|
log_every_n_steps: 1 |
|
update_ref_policy_every_n_steps: 13 |
|
|