|
{ |
|
"os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39", |
|
"python": "3.10.0", |
|
"heartbeatAt": "2025-01-17T19:52:32.532343", |
|
"startedAt": "2025-01-17T19:52:32.000044", |
|
"docker": null, |
|
"cuda": null, |
|
"args": [ |
|
"--model_family", |
|
"llama", |
|
"--apply_instruct_masks", |
|
"--token_scaled_loss", |
|
"--seq_parallel_size", |
|
"8", |
|
"--report_to", |
|
"wandb", |
|
"--do_train", |
|
"--model_name_or_path", |
|
"/datasets/ai/llama3/meta-llama/models--meta-llama--Meta-Llama-3.1-8B-Instruct/snapshots/5206a32e0bd3067aef1ce90f5528ade7d866253f/", |
|
"--config_name", |
|
"/datasets/ai/llama3/meta-llama/models--meta-llama--Meta-Llama-3.1-8B-Instruct/snapshots/5206a32e0bd3067aef1ce90f5528ade7d866253f/", |
|
"--tokenizer_name", |
|
"/datasets/ai/llama3/meta-llama/models--meta-llama--Meta-Llama-3.1-8B-Instruct/snapshots/5206a32e0bd3067aef1ce90f5528ade7d866253f/", |
|
"--run_name", |
|
"_llama-3.1-8b-instruct_bsz-16_lr-1e-6_epochs-1_", |
|
"--output_dir", |
|
"/scratch3/workspace/ctpham_umass_edu-ft/_llama-3.1-8b-instruct_bsz-16_lr-1e-6_epochs-1_", |
|
"--config_overrides_json", |
|
"", |
|
"--gradient_accumulation_steps", |
|
"2", |
|
"--per_device_train_batch_size", |
|
"1", |
|
"--bf16", |
|
"--learning_rate", |
|
"1e-6", |
|
"--min_lr_ratio", |
|
"0.1", |
|
"--lr_scheduler_type", |
|
"cosine", |
|
"--max_grad_norm", |
|
"1.0", |
|
"--adam_beta1", |
|
"0.9", |
|
"--adam_beta2", |
|
"0.95", |
|
"--weight_decay", |
|
"0.1", |
|
"--warmup_ratio", |
|
"0.05", |
|
"--optim", |
|
"adamw_torch", |
|
"--logging_steps", |
|
"1", |
|
"--log_level", |
|
"info", |
|
"--save_steps", |
|
"200", |
|
"--dataloader_num_workers", |
|
"1", |
|
"--disable_tqdm", |
|
"true", |
|
"--use_fast_tokenizer", |
|
"false", |
|
"--remove_unused_columns", |
|
"false", |
|
"--ddp_find_unused_parameters", |
|
"false", |
|
"--fsdp", |
|
"auto_wrap offload", |
|
"--gradient_checkpointing", |
|
"--tokenized_mds_train", |
|
"/work/pi_miyyer_umass_edu/ctpham/BookClaim-dev/data/ft/bookclaim_balanced_pack_complete", |
|
"--cuda_empty_cache", |
|
"--num_train_epochs", |
|
"1" |
|
], |
|
"state": "running", |
|
"program": "/work/pi_miyyer_umass_edu/ctpham/BookClaim-dev/prolong-final/finetune.py", |
|
"codePathLocal": "finetune.py", |
|
"codePath": "prolong-final/finetune.py", |
|
"git": { |
|
"remote": "https://github.com/chtmp223/BookGen-dev.git", |
|
"commit": "e60c366cb7f40b08835d1f51dd65e296355b4d2a" |
|
}, |
|
"email": "[email protected]", |
|
"root": "/work/pi_miyyer_umass_edu/ctpham/BookClaim-dev", |
|
"host": "gpu017", |
|
"username": "ctpham_umass_edu", |
|
"executable": "/scratch3/workspace/ctpham_umass_edu-ft/envs/prolong-final/bin/python3.10", |
|
"cpu_count": 112, |
|
"cpu_count_logical": 112, |
|
"cpu_freq": { |
|
"current": 945.6287678571429, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
"cpu_freq_per_core": [ |
|
{ |
|
"current": 771.263, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 1639.301, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 784.967, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 785.053, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 1800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 2400.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 1100.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 2400.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 2300.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 1817.34, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 784.299, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 1200.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 2002.088, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 794.132, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 1400.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 3800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 785.042, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 785.705, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 784.932, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 788.581, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 811.886, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 900.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 2400.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
}, |
|
{ |
|
"current": 800.0, |
|
"min": 800.0, |
|
"max": 3800.0 |
|
} |
|
], |
|
"disk": { |
|
"/": { |
|
"total": 438.487850189209, |
|
"used": 16.720291137695312 |
|
} |
|
}, |
|
"gpu": "NVIDIA A100-SXM4-80GB", |
|
"gpu_count": 8, |
|
"gpu_devices": [ |
|
{ |
|
"name": "NVIDIA A100-SXM4-80GB", |
|
"memory_total": 85899345920 |
|
}, |
|
{ |
|
"name": "NVIDIA A100-SXM4-80GB", |
|
"memory_total": 85899345920 |
|
}, |
|
{ |
|
"name": "NVIDIA A100-SXM4-80GB", |
|
"memory_total": 85899345920 |
|
}, |
|
{ |
|
"name": "NVIDIA A100-SXM4-80GB", |
|
"memory_total": 85899345920 |
|
}, |
|
{ |
|
"name": "NVIDIA A100-SXM4-80GB", |
|
"memory_total": 85899345920 |
|
}, |
|
{ |
|
"name": "NVIDIA A100-SXM4-80GB", |
|
"memory_total": 85899345920 |
|
}, |
|
{ |
|
"name": "NVIDIA A100-SXM4-80GB", |
|
"memory_total": 85899345920 |
|
}, |
|
{ |
|
"name": "NVIDIA A100-SXM4-80GB", |
|
"memory_total": 85899345920 |
|
} |
|
], |
|
"memory": { |
|
"total": 2015.3286399841309 |
|
} |
|
} |
|
|