Mistral-7B-LoRA-MeanRef-0.8-U13-0-1 / torchtune_config.yaml

Upload folder using huggingface_hub

b686860 verified 12 days ago

2.66 kB

	prefix: /home/arqa39/.torchtune
	output_dir: /home/arqa39/.torchtune/fed_ppo/mistral_7b
	tokenizer:
	_component_: torchtune.models.mistral.mistral_tokenizer
	path: ${prefix}/models/Mistral-7B-Instruct-v0.2/tokenizer.model
	max_seq_len: null
	dataset:
	_component_: torchtune.datasets.text_completion_dataset
	source: trl-internal-testing/sentiment-trl-style
	split: train
	column: prompt
	add_eos: false
	policy:
	_component_: torchtune.models.mistral.lora_mistral_7b
	lora_attn_modules:
	- q_proj
	- k_proj
	- v_proj
	- output_proj
	apply_lora_to_mlp: true
	apply_lora_to_output: false
	lora_rank: 64
	lora_alpha: 16
	lora_dropout: 0.0
	quantize_base: false
	valmod:
	_component_: torchtune.models.mistral._component_builders.lora_mistral_classifier
	attn_dropout: 0.0
	embed_dim: 4096
	intermediate_dim: 14336
	max_seq_len: 32768
	norm_eps: 1.0e-05
	num_classes: 1
	num_heads: 32
	num_kv_heads: 8
	num_layers: 32
	vocab_size: 32001
	lora_attn_modules:
	- q_proj
	- k_proj
	- v_proj
	- output_proj
	apply_lora_to_mlp: true
	apply_lora_to_output: = True
	lora_rank: 16
	lora_alpha: 32
	lora_dropout: 0.0
	quantize_base: false
	checkpointer:
	_component_: torchtune.training.FullModelHFCheckpointer
	checkpoint_dir: ${prefix}/models/Mistral-7B-Instruct-v0.2/
	checkpoint_files:
	- pytorch_model-00001-of-00003.bin
	- pytorch_model-00002-of-00003.bin
	- pytorch_model-00003-of-00003.bin
	recipe_checkpoint: null
	output_dir: ${output_dir}/policy
	model_type: MISTRAL
	value_checkpointer:
	_component_: torchtune.training.FullModelHFCheckpointer
	checkpoint_dir: ${prefix}/models/RM-Mistral-7B/
	checkpoint_files:
	- model-00001-of-00003.safetensors
	- model-00002-of-00003.safetensors
	- model-00003-of-00003.safetensors
	output_dir: ${output_dir}/value
	model_type: REWARD
	seed: 53710
	shuffle: true
	device: cuda
	batch_size: 64
	num_steps: 10000
	ppo_epochs: 2
	ppo_batch_size: 32
	gradient_accumulation_steps: 1
	compile: false
	optimizer:
	_component_: bitsandbytes.optim.PagedAdamW
	lr: 0.0001
	optimizer_in_bwd: true
	log_peak_memory_stats: true
	enable_activation_checkpointing: true
	dtype: bf16
	forward_batch_size: 16
	max_generated_tokens: 58
	temperature: 0.7
	top_k: null
	min_response_length: 18
	penalise_no_eos: true
	reward_penalty: -3
	stop_token_ids:
	- 2
	- 28723
	whiten_rewards: false
	gamma: 1
	lmbda: 0.95
	loss:
	_component_: torchtune.rlhf.loss.PPOLoss
	epsilon: 0.2
	value_coeff: 0.1
	value_clip_range: 0.2
	kl_coeff: 0.01
	wandb_logger:
	dir: ${prefix}
	entity: RADFAN
	project: FedPPO
	group: SelfReference
	name: Mistral-7B-LoRA-SelfRef-U13
	log_every_n_steps: 1
	update_ref_policy_every_n_steps: 13