toolkit / training_scripts /t35-template
k4d3's picture
you fucked up, claude
c7dbbcf
raw
history blame
3.92 kB
#!/usr/bin/env zsh
set -e -o pipefail
NAME=test-deleteme-v0s100
SD_SCRIPT="${SD_SCRIPT:-sd3_train_network.py}"
SD_REPO="${SD_REPO:-$HOME/source/repos/sd-scripts-sd3}"
args=(
# ⚠️ TODO: Benchmark...
#--debiased_estimation_loss
# ⚠️ TODO: What does this do? Does it even work?
#--max_token_length=225
--clip_g_dropout_rate=0.0
--t5_dropout_rate=0.0
--enable_scaled_pos_embed
# Keep Tokens
--keep_tokens=1
--keep_tokens_separator="|||"
# Model
--pretrained_model_name_or_path=/home/kade/ComfyUI/models/checkpoints/sd3.5_large.safetensors
--clip_l=/home/kade/ComfyUI/models/clip/clip_l.safetensors
--clip_g=/home/kade/ComfyUI/models/clip/clip_g.safetensors
--t5xxl=/home/kade/ComfyUI/models/clip/t5xxl_fp16.safetensors
# Output, logging
--log_with=tensorboard
--seed=1728871242
--fp8_base
# Dataset
--dataset_repeats=1
--resolution="1024,1024"
--enable_bucket
--bucket_reso_steps=64
--min_bucket_reso=128
--max_bucket_reso=2048
--flip_aug
--shuffle_caption
--cache_latents
--cache_latents_to_disk
--max_data_loader_n_workers=8
--persistent_data_loader_workers
# Network config
--network_dim=64
# ⚠️ TODO: Plot
--network_alpha=0.0625
--network_module="lycoris.kohya"
--network_args
"preset=full"
"decompose_both=False"
"rank_dropout=0"
"module_dropout=0"
"use_tucker=True"
"use_scalar=False"
"rank_dropout_scale=False"
"algo=lokr"
"bypass_mode=False"
"factor=16"
"dora_wd=True"
"train_norm=False"
--network_dropout=0
# Optimizer config
--optimizer_type=ClybW
--train_batch_size=14
#--gradient_accumulation_steps=1
--max_grad_norm=1
--gradient_checkpointing
#--scale_weight_norms=1
# LR Scheduling
#--lr_warmup_steps=100
# NOTE: 0.0004 if its anything like FLUX..
--learning_rate=0.0005
--unet_lr=0.0002
--text_encoder_lr=0.0001
--lr_scheduler="cosine"
--lr_scheduler_args="num_cycles=0.375"
# Noise
--multires_noise_iterations=12
--multires_noise_discount=0.4
#--min_snr_gamma=1
# Optimization, details
--no_half_vae
--sdpa
--mixed_precision="bf16"
# Saving
--save_model_as="safetensors"
--save_precision="fp16"
--save_every_n_steps=100
# Saving States
#--save_state
# Either resume from a saved state
#--resume="$OUTPUT_DIR/wolflink-vfucks400" # Resume from saved state
#--skip_until_initial_step
# Or from a checkpoint
#--network_weights="$OUTPUT_DIR/wolflink-vfucks400/wolflink-vfucks400-step00000120.safetensors" # Resume from checkpoint (not needed with state, i think)
#--initial_step=120
# Sampling
--sample_every_n_steps=25
--sample_sampler="euler_a"
--sample_at_first
--caption_extension=".txt"
)
# ===== Environment Setup =====
source "$HOME/toolkit/zsh/train_functions.zsh"
# Setup variables and training arguments
setup_training_vars "$NAME"
args+=( # Add the output and dataset arguments
--output_dir="$OUTPUT_DIR/$NAME"
--output_name="$NAME"
--log_prefix="$NAME-"
--logging_dir="$OUTPUT_DIR/logs"
--max_train_steps=$STEPS
--dataset_config="$TRAINING_DIR/config.toml"
#--train_data_dir="$TRAINING_DIR"
--sample_prompts="$TRAINING_DIR/sample-prompts.txt"
# script arguments
"$@"
)
setup_conda_env "sdscripts"
LYCORIS_REPO=$(get_lycoris_repo)
# Set cleanup trap for both error and normal exit
trap cleanup_empty_output EXIT TERM
# Copies the script itself and repositories' commits hashes to the output directory
store_commits_hashes "$SD_REPO" "$LYCORIS_REPO"
# ===== Run Training Script =====
run_training_script "$SD_REPO/$SD_SCRIPT" "${args[@]}"