model_name: micro-llama-300M-v2 out_dir: out/pretrain/micro-llama-v2 precision: bf16-mixed initial_checkpoint_dir: /root/litgpt/out_lightning_ai/step-00128000-converted resume: false data: class_path: litgpt.data.MicroLlama init_args: data_path: s3://microllama-v2 seed: 42 num_workers: 8 train: save_interval: 1000 log_interval: 10 global_batch_size: 32 micro_batch_size: 4 lr_warmup_steps: 2000 max_tokens: 3000000000000 max_seq_length: 2048 max_norm: 1.0 min_lr: 4.0e-05 eval: interval: 1000 max_iters: 100 initial_validation: false final_validation: true evaluate_example: first optimizer: class_path: torch.optim.AdamW init_args: lr: 0.0004 weight_decay: 0.1 betas: - 0.9 - 0.95 devices: auto num_nodes: 1 tokenizer_dir: checkpoints/meta-llama/Llama-3.2-1B logger_name: wandb seed: 42