#!/usr/bin/env zsh set -e -o pipefail NAME=test-deleteme-v0s100 SD_SCRIPT="${SD_SCRIPT:-sd3_train_network.py}" SD_REPO="${SD_REPO:-$HOME/source/repos/sd-scripts-sd3}" args=( # ⚠️ TODO: Benchmark... #--debiased_estimation_loss # ⚠️ TODO: What does this do? Does it even work? #--max_token_length=225 --clip_g_dropout_rate=0.0 --t5_dropout_rate=0.0 --enable_scaled_pos_embed # Keep Tokens --keep_tokens=1 --keep_tokens_separator="|||" # Model --pretrained_model_name_or_path=/home/kade/ComfyUI/models/checkpoints/sd3.5_large.safetensors --clip_l=/home/kade/ComfyUI/models/clip/clip_l.safetensors --clip_g=/home/kade/ComfyUI/models/clip/clip_g.safetensors --t5xxl=/home/kade/ComfyUI/models/clip/t5xxl_fp16.safetensors # Output, logging --log_with=tensorboard --seed=1728871242 --fp8_base # Dataset --dataset_repeats=1 --resolution="1024,1024" --enable_bucket --bucket_reso_steps=64 --min_bucket_reso=128 --max_bucket_reso=2048 --flip_aug --shuffle_caption --cache_latents --cache_latents_to_disk --max_data_loader_n_workers=8 --persistent_data_loader_workers # Network config --network_dim=64 # ⚠️ TODO: Plot --network_alpha=0.0625 --network_module="lycoris.kohya" --network_args "preset=full" "decompose_both=False" "rank_dropout=0" "module_dropout=0" "use_tucker=True" "use_scalar=False" "rank_dropout_scale=False" "algo=lokr" "bypass_mode=False" "factor=16" "dora_wd=True" "train_norm=False" --network_dropout=0 # Optimizer config --optimizer_type=ClybW --train_batch_size=14 #--gradient_accumulation_steps=1 --max_grad_norm=1 --gradient_checkpointing #--scale_weight_norms=1 # LR Scheduling #--lr_warmup_steps=100 # NOTE: 0.0004 if its anything like FLUX.. --learning_rate=0.0005 --unet_lr=0.0002 --text_encoder_lr=0.0001 --lr_scheduler="cosine" --lr_scheduler_args="num_cycles=0.375" # Noise --multires_noise_iterations=12 --multires_noise_discount=0.4 #--min_snr_gamma=1 # Optimization, details --no_half_vae --sdpa --mixed_precision="bf16" # Saving --save_model_as="safetensors" --save_precision="fp16" --save_every_n_steps=100 # Saving States #--save_state # Either resume from a saved state #--resume="$OUTPUT_DIR/wolflink-vfucks400" # Resume from saved state #--skip_until_initial_step # Or from a checkpoint #--network_weights="$OUTPUT_DIR/wolflink-vfucks400/wolflink-vfucks400-step00000120.safetensors" # Resume from checkpoint (not needed with state, i think) #--initial_step=120 # Sampling --sample_every_n_steps=25 --sample_sampler="euler_a" --sample_at_first --caption_extension=".txt" ) # ===== Environment Setup ===== source "$HOME/toolkit/zsh/train_functions.zsh" # Setup variables and training arguments setup_training_vars "$NAME" args+=( # Add the output and dataset arguments --output_dir="$OUTPUT_DIR/$NAME" --output_name="$NAME" --log_prefix="$NAME-" --logging_dir="$OUTPUT_DIR/logs" --max_train_steps=$STEPS --dataset_config="$TRAINING_DIR/config.toml" #--train_data_dir="$TRAINING_DIR" --sample_prompts="$TRAINING_DIR/sample-prompts.txt" # script arguments "$@" ) setup_conda_env "sdscripts" LYCORIS_REPO=$(get_lycoris_repo) # Set cleanup trap for both error and normal exit trap cleanup_empty_output EXIT TERM # Copies the script itself and repositories' commits hashes to the output directory store_commits_hashes "$SD_REPO" "$LYCORIS_REPO" # ===== Run Training Script ===== run_training_script "$SD_REPO/$SD_SCRIPT" "${args[@]}"