smohammadi
/

torchtune_test

English

gemma

torchtune

Model card Files Files and versions Community

smohammadi commited on Oct 6, 2024

Commit

ab2eea2

verified ·

1 Parent(s): 8d96ff1

Upload config.yaml with huggingface_hub

Browse files

Files changed (1) hide show

config.yaml +66 -0

config.yaml ADDED Viewed

	@@ -0,0 +1,66 @@

+tokenizer:
+  _component_: torchtune.models.gemma.gemma_tokenizer
+  path: ./target/gemma-2b/tokenizer.model
+dataset:
+  _component_: torchtune.datasets.alpaca_dataset
+seed: null
+shuffle: true
+model:
+  _component_: torchtune.models.gemma.lora_gemma_2b
+  lora_attn_modules:
+  - q_proj
+  - k_proj
+  - v_proj
+  apply_lora_to_mlp: true
+  lora_rank: 64
+  lora_alpha: 128
+  lora_dropout: 0.0
+checkpointer:
+  _component_: torchtune.training.FullModelHFCheckpointer
+  checkpoint_dir: ./target/gemma-2b/
+  checkpoint_files:
+  - model-00001-of-00002.safetensors
+  - model-00002-of-00002.safetensors
+  recipe_checkpoint: null
+  output_dir: ${output_dir}/weights
+  model_type: GEMMA
+resume_from_checkpoint: false
+save_adapter_weights_only: false
+optimizer:
+  _component_: torch.optim.AdamW
+  fused: false
+  lr: 2.0e-05
+lr_scheduler:
+  _component_: torchtune.modules.get_cosine_schedule_with_warmup
+  num_warmup_steps: 10
+loss:
+  _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
+batch_size: 4
+epochs: 1
+max_steps_per_epoch: 10
+gradient_accumulation_steps: 4
+compile: false
+device: mps
+enable_activation_checkpointing: true
+enable_activation_offloading: false
+dtype: bf16
+metric_logger:
+  _component_: torchtune.training.metric_logging.DiskLogger
+  log_dir: ${output_dir}
+output_dir: ./target/gemma_tmp
+log_every_n_steps: 1
+log_peak_memory_stats: false
+profiler:
+  _component_: torchtune.training.setup_torch_profiler
+  enabled: false
+  output_dir: ${output_dir}/profiling_outputs
+  cpu: true
+  cuda: true
+  profile_memory: false
+  with_stack: false
+  record_shapes: true
+  with_flops: false
+  wait_steps: 5
+  warmup_steps: 5
+  active_steps: 2
+  num_cycles: 1