File size: 45,725 Bytes

# Configuration for Oh My Zsh and custom settings
# ----------------------------------------------
# 1. Source the custom git wrapper script
# 2. Set the path for Oh My Zsh installation
# 3. Define plugins for extended functionality:
#    - git: Provides aliases and functions for Git
#    - autojump: Enables quick navigation to frequently visited directories
#    - conda-env: Adds support for Conda environment management
# 4. Set the custom theme for the shell prompt

#display_git_help() {
#  echo "Git"
#  echo "---"
#  echo "ga: \`git add . && git commit -avs --verbose && git push\`"
#  echo "gc: \`git commit -avs --verbose\`"
#  echo "gcs: \`git clone --recurse-submodules\`"
#  echo "grh: \`git reset --hard\`"
#  echo "wd: \`git diff --word-diff-regex='[^,]+' --patience\`"
#  echo "gs: \`git status\`"
#  echo "gcx: \`git clean -fxd\`"
#}
#display_git_help

export NODE_ENV=production

# The `export QT_QPA_PLATFORM=offscreen` command is used to set the `QT_QPA_PLATFORM`
# environment variable to `offscreen`. This is particularly useful when running Qt
# applications in a headless environment, such as a server or a CI/CD pipeline,
# where there is no display server available. By setting this variable, Qt
# applications can render their graphical output offscreen, allowing them to
# run without requiring a graphical user interface (GUI). This is commonly used for
# automated testing, rendering, or other tasks that do not require user interaction.
export QT_QPA_PLATFORM=offscreen

# Enable the experimental Just-In-Time (JIT) compiler for Python 3.13.
# This can improve performance by compiling Python code to machine code at runtime.
# Note: The JIT is only available for x86_64 builds of Python in conda.
export PYTHON_JIT=1

# Load the custom git wrapper script
source $HOME/toolkit/zsh/git-wrapper.zsh

# Set the path to the Oh My Zsh installation directory
export ZSH="$HOME/.oh-my-zsh"

# Enable Oh My Zsh plugins for additional features
plugins=(git autojump conda-env)

# Set the custom theme for the shell prompt
ZSH_THEME="kade"
# CASE_SENSITIVE="true"
# HYPHEN_INSENSITIVE="true"
# DISABLE_MAGIC_FUNCTIONS="true"
# DISABLE_LS_COLORS="true"
# DISABLE_AUTO_TITLE="true"
# ENABLE_CORRECTION="true"
# COMPLETION_WAITING_DOTS="true"
# DISABLE_UNTRACKED_FILES_DIRTY="true"

export GIN_MODE=release

# Set the system language and locale to Japanese UTF-8
export LANG=ja_JP.UTF-8
export LC_ALL=ja_JP.UTF-8
# Set the maximum number of commands to store in the shell history
export HISTSIZE=500000
# Set the path to the ComfyUI installation
export COMFYUI_PATH="$HOME/ComfyUI"
# Enable full backtrace for Rust programs
export RUST_BACKTRACE=1
# Opt out of .NET CLI telemetry data collection
export DOTNET_CLI_TELEMETRY_OPTOUT=1
# Who the fuck would want this on?!
export DISABLE_TELEMETRY=YES
# Enable color output in the terminal (value might need adjustment)
export CLICOLOR=126

# ⚠️ TODO: This needs to be benched but I'm too bad at this!

# Set the maximum number of threads for NumExpr library
# NumExpr is used for fast numerical array operations
# This setting can improve performance for multi-threaded NumPy operations
export NUMEXPR_MAX_THREADS=24 
# Set the maximum number of threads for Apple's Accelerate framework (VecLib)
# This affects performance of vector and matrix operations on macOS
# Note: This setting may not have an effect on non-macOS systems
export VECLIB_MAXIMUM_THREADS=24
# Set the number of threads for Intel Math Kernel Library (MKL)
# MKL is used for optimized mathematical operations, especially in NumPy
# This can significantly impact performance of linear algebra operations
export MKL_NUM_THREADS=24
# Set the number of threads for OpenMP
# OpenMP is used for parallel programming in C, C++, and Fortran
# This affects the performance of libraries and applications using OpenMP
export OMP_NUM_THREADS=24

# Disable parallelism for the Hugging Face Tokenizers library
# This can help prevent potential deadlocks or race conditions in multi-threaded environments
# It's particularly useful when using tokenizers in conjunction with DataLoader in PyTorch
# Setting this to false ensures more predictable behavior, especially in production environments
# However, it may slightly reduce performance in some scenarios where parallel tokenization is beneficial
export TOKENIZERS_PARALLELISM=false

# Source the broot launcher script for enhanced file navigation
source /home/kade/.config/broot/launcher/bash/br

# Source the fzf (Fuzzy Finder) configuration for zsh if it exists
# This enables fzf functionality in the shell, including keybindings and auto-completion
[ -f ~/.fzf.zsh ] && source ~/.fzf.zsh

# >>> conda initialize >>>
# !! Contents within this block are managed by 'conda init' !!
__conda_setup="$('/home/kade/miniconda3/bin/conda' 'shell.zsh' 'hook' 2> /dev/null)"
if [ $? -eq 0 ]; then
    eval "$__conda_setup"
else
    if [ -f "/home/kade/miniconda3/etc/profile.d/conda.sh" ]; then
        . "/home/kade/miniconda3/etc/profile.d/conda.sh"
    else
        export PATH="/home/kade/miniconda3/bin:$PATH"
    fi
fi
unset __conda_setup
# <<< conda initialize <<<

unset CONDA_CHANGEPS1

# Source the Oh My Zsh script
# This line loads Oh My Zsh, a popular framework for managing Zsh configuration
# It sets up various features like themes, plugins, and custom functions
# The $ZSH variable should be set to the installation directory of Oh My Zsh
# This is typically done earlier in the .zshrc file, often as: export ZSH="$HOME/.oh-my-zsh"
# After sourcing, all Oh My Zsh functionality becomes available in your shell session
source $ZSH/oh-my-zsh.sh

# Extend the system PATH to include various directories:
# - Custom dataset tools in the user's repository
# - Rust's Cargo binary directory
# - Miniconda3 binary directory
# - User's toolkit directory
# - Redis and PostgreSQL binary directories
# - User's local bin directory
# - CUDA binary directory
export PATH=$PATH:$HOME/source/repos/dataset-tools/target/x86_64-unknown-linux-gnu/release:$HOME/.cargo/bin:$HOME/miniconda3/bin:$HOME/toolkit:$HOME/db/redis-stable/src:$HOME/db/postgresql/bin:$HOME/.local/bin:/opt/cuda/bin

# Function to remove $HOME/miniconda3/bin, $HOME/miniconda3/condabin from PATH and $HOME/miniconda3/lib from LD_LIBRARY_PATH
rconda() {
    export PATH=$(echo $PATH | tr ':' '\n' | grep -v "$HOME/miniconda3/bin" | grep -v "$HOME/miniconda3/condabin" | tr '\n' ':' | sed 's/:$//')
    export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | tr ':' '\n' | grep -v "$HOME/miniconda3/lib" | tr '\n' ':' | sed 's/:$//')
}

# Extend the LD_LIBRARY_PATH to include:
# - Conda environment's library directory
# - CUDA library directory for x86_64 Linux
# This ensures that dynamically linked libraries in these locations can be found at runtime
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib:/opt/cuda/targets/x86_64-linux/lib

# Set the path for ComfyUI models
# This environment variable likely tells ComfyUI where to look for AI models
export COMFYUI_MODEL_PATH=/home/kade/ComfyUI/models

# Use the UPX executable compression tool from the local bin directory
alias upx='/home/kade/.local/bin/upx'
# Always display colorized output for the 'ls' command
alias ls='ls --color=always'
# List all files in long format, including hidden files, with human-readable sizes and colorized output
alias ll="ls -lah --color=always"
# Use the 'reflink' option for 'cp' to enable copy-on-write when possible, improving efficiency
alias cp='cp --reflink=auto'
# Launch TensorBoard with the log directory set to the user's output_dir/logs
alias t="tensorboard --logdir=$HOME/output_dir/logs"
# Edit tmux configuration, display a message, and reload the tmux configuration
alias rt="vim ~/.tmux.conf && echo \"Reloading tmux config\" && tmux source ~/.tmux.conf"
# Edit zsh configuration, display a message, and reload the zsh configuration
alias zr="vim ~/.zshrc && echo \"Reloading zsh config\" && source ~/.zshrc"

# The kanji 接 (せつ) [setsu] means "touch," "contact," "adjoin," or "piece together."
# It is used here to represent the action of "attaching" to an existing tmux session,
# as it conveys the idea of connecting or joining the session.
# To type the kanji 接 on a Japanese keyboard:
# 1. Switch your keyboard to Japanese input mode.
# 2. Type "setsu" (せつ) in hiragana.
# 3. Press the spacebar to convert it to the kanji 接.
alias 接="tmux attach"
# Alias for attaching to an existing tmux session
# 'ta' is a shorthand for 'tmux attach'
alias ta="tmux attach"

# Alias for adding all changes, committing with a signed verbose message, and pushing to remote
alias ga="git add . && git commit -avs && git push"
# Alias for checking the current status of the git repository
alias gs="git status"
# Alias for displaying word-level differences in git, using a custom regex and the patience algorithm
alias wd="git diff --word-diff-regex='[^,]+' --patience"
# Alias for using Neovim instead of Vim
alias vim="nvim"
# Another alias for using Neovim instead of Vim
alias vi="nvim"
# Short alias for quickly opening Neovim
alias v="nvim"
# Alias for resetting the git repository to the last commit, discarding all changes
alias grh='git reset --hard'
# Alias for cloning a git repository including all its submodules
alias gcs='git clone --recurse-submodules'
# Alias for running the Grabber-cli command
alias grabber="Grabber-cli"

# 'pie' is a shortcut for installing a Python package in editable mode
# using the pip command with the --use-pep517 option.
alias pie='pip install -e . --use-pep517'
# Same thing but with ".[torch]"
alias piet='pip install -e . --use-pep517 ".[torch]"'
# Alias for creating a signed, verbose git commit
alias gc="git commit -avs --verbose"
# Alias for displaying directory contents with colorized output
alias dir="dir --color=always"
# Alias for quickly reloading the zsh configuration file
alias rl="source ~/.zshrc"
# Alias for quickly editing and reloading the zsh configuration file
alias ezc="nvim ~/.zshrc && source ~/.zshrc"

# This function copies the sample prompts file to each dataset directory.
# It iterates through all directories in ~/datasets that start with "by_"
# and copies the kade-sample-prompts.txt file from the toolkit directory
# to a new file named sample-prompts.txt in each dataset directory.
function copy_sample_prompts() {
    for dir in ~/datasets/by_*; do
        if [[ -d "$dir" ]]; then
            cp ~/toolkit/kade-sample-prompts.txt "$dir/sample-prompts.txt"
        fi
    done
}

# Function: re (Resize SDXL LoRA)
# Description:
#   This function resizes an SDXL LoRA (Low-Rank Adaptation) model using the resize_lora.py script.
#   It applies a specific resizing recipe to reduce the size of the LoRA while maintaining its effectiveness.
#
# Usage:
#   re <target_file>
#
# Parameters:
#   $1 (target_file): Path to the input LoRA safetensors file to be resized.
#
# Actions:
#   1. Calls the resize_lora.py script with the following arguments:
#      - Verbose output (-vv)
#      - Custom resizing recipe (-r fro_ckpt=1,thr=-3.55)
#      - Path to the SDXL checkpoint file (ponyDiffusionV6XL_v6StartWithThisOne.safetensors)
#      - Path to the input LoRA file
#
# Recipe Explanation:
#   - fro_ckpt=1: Uses the Frobenius norm of the checkpoint layer as the score metric
#   - thr=-3.55: Sets a threshold for singular values at 10^-3.55 ≈ 0.000282 times the reference
#
# Notes:
#   - This function assumes the resize_lora.py script is located at ~/source/repos/resize_lora/
#   - The SDXL checkpoint file is expected to be in ~/ComfyUI/models/checkpoints/
#   - Output will be verbose (-vv) for detailed information during the resizing process
#   - The resized LoRA will be saved in the same directory as the script by default
function re() {
    target_file="$1"
    python ~/source/repos/resize_lora/resize_lora.py -vv -r fro_ckpt=1,thr=-3.55 ~/ComfyUI/models/checkpoints/ponyDiffusionV6XL_v6StartWithThisOne.safetensors "$target_file"
}

# This function takes two arguments: a tag and a directory.
# It searches for all .txt files in the specified directory and its subdirectories.
# If a file contains the specified tag, the function removes the tag from its original position
# and prepends it to the beginning of the file.
# Usage: rejiggle_tags <tag> <directory>
rejiggle_tags() {
  local tag="$1"
  local dir="$2"

  if [[ -z "$tag" || -z "$dir" ]]; then
    echo "Usage: rejiggle_tags <tag> <directory>"
    return 1
  fi

  find "$dir" -type f -name "*.tags" | while read -r file; do
    if grep -q "$tag" "$file"; then
      sed -i "s/$tag//g" "$file"
      sed -i "1s/^/$tag, /" "$file"
    fi
  done
}

# This function, `update_conda`, automates the process of upgrading all packages in every conda environment.
# It performs the following steps:
# 1. Retrieves the list of all conda environments using `conda env list` and extracts their names.
# 2. Iterates through each environment name.
# 3. Activates each environment using `conda activate`.
# 4. Upgrades all packages in the activated environment using `conda upgrade --all -y`.
# 5. Deactivates the environment using `conda deactivate`.
# 6. Prints a message indicating that all environments have been upgraded.
#
# Note: This script assumes that the user has the necessary permissions to activate and deactivate conda environments.
#       It also assumes that `conda` is installed and properly configured in the user's PATH.
#
# Usage:
# Simply call the `update_conda` function in your shell to upgrade all packages in all conda environments.
update_conda() {
    # Get the list of all conda environments
    envs=$(conda env list | awk '{print $1}' | tail -n +4)

    # Loop through each environment and run conda upgrade --all
    for env in $envs; do
        echo "Activating environment: $env"
        source activate $env
        echo "Upgrading all packages in environment: $env"
        conda upgrade --all -y
        conda deactivate
    done

    echo "All environments have been upgraded."
}

# Function: list_word_freqs
# Description:
#   This function analyzes text files in a specified directory and lists the most frequent words (tags).
#
# Usage:
#   list_word_freqs <target_directory>
#
# Parameters:
#   - target_directory: The directory containing the text files to analyze.
#
# Functionality:
#   1. Combines all .txt files in the target directory into a single temporary file.
#   2. Uses awk to process the combined file:
#      - Ignores common words like "a", "the", "and", etc.
#      - Converts all words to lowercase and removes non-alphabetic characters.
#      - Counts the frequency of each word.
#   3. Sorts the words by frequency in descending order.
#   4. Displays the top 40 most frequent words along with their occurrence count.
#
# Output:
#   Prints a list of the 40 most frequent words in the format: <frequency> <word>
#
# Note:
#   - This function is useful for analyzing tag frequencies in image caption files or similar text-based datasets.
#   - The list of ignored words can be modified to suit specific needs.
#   - The function creates a temporary file which is automatically removed after processing.
list_word_freqs() {
    local target_dir=$1
    if [[ -z "$target_dir" ]]; then
        echo "Usage: list_word_freqs <target_directory>"
        return 1
    fi

    # Combine all text files into one
    local combined_file=$(mktemp)
    cat "$target_dir"/*.txt > "$combined_file"

    # List the most frequent words, ignoring specific words
    awk '
    BEGIN {
        ignore["a"]
        ignore["the"]
        ignore["and"]
        ignore["is"]
        ignore["with"]
        ignore["of"]
        ignore["in"]
        ignore["or"]
        ignore["on"]
        ignore["to"]
        ignore["has"]
        ignore["he"]
        ignore["from"]
    }
    {
        for (i = 1; i <= NF; i++) {
            word = tolower($i)
            gsub(/[^a-z]/, "", word)
            if (length(word) > 0 && !(word in ignore)) {
                freq[word]++
            }
        }
    }
    END {
        for (word in freq) {
            print freq[word], word
        }
    }
    ' "$combined_file" | sort -nr | head -n 40

    # Clean up
    rm "$combined_file"
}

# Function: sample_prompts
# Description:
#   This function takes a sample of the tag in a target training directory.
#   It reads and displays the contents of all .txt files in the specified directory,
#   providing a quick overview of the tags used for training.
#
# Usage: sample_prompts <target_directory>
#
# Parameters:
#   - target_directory: The directory containing the .txt files with tags.
#
# Output:
#   Prints the contents of each .txt file in the target directory, separated by newlines.
sample_prompts() {
    local target_directory="$1"

    for file in "$target_directory"/*.txt; do
        cat "$file"
        echo -e "\n"
    done
}

# replace_comma_with_keep_tags
# Description: This function replaces the specified occurrence of a comma with " |||" in all *.tags files
#              in all subdirectories of a target directory or the current directory when no path is passed.
# Usage: replace_comma_with_keep_tags <occurrence_number> [target_directory]
# Parameters:
#   - occurrence_number: The occurrence number of the comma to be replaced (e.g., 1 for the first occurrence).
#   - target_directory (optional): The target directory to search for *.tags files. If not provided, the current directory is used.
# Example:
#   replace_comma_with_keep_tags 2 /path/to/directory
#   replace_comma_with_keep_tags 1
replace_comma_with_keep_tags_txt() {
  local occurrence_number=$1
  local target_directory=${2:-.}

  if [[ -z "$occurrence_number" ]]; then
    echo "Error: occurrence_number is required."
    return 1
  fi

  find "$target_directory" -type f -name "*.txt" | while read -r file; do
    awk -v occurrence="$occurrence_number" '{
      count = 0
      for (i = 1; i <= NF; i++) {
        if ($i ~ /,/) {
          count++
          if (count == occurrence) {
            gsub(/,/, " |||", $i)
          }
        }
      }
      print
    }' "$file" > tmpfile && mv tmpfile "$file"
  done
}


replace_comma_with_keep_tags() {
  local occurrence_number=$1
  local target_directory=${2:-.}

  if [[ -z "$occurrence_number" ]]; then
    echo "Error: occurrence_number is required."
    return 1
  fi

  find "$target_directory" -type f -name "*.tags" | while read -r file; do
    awk -v occurrence="$occurrence_number" '{
      count = 0
      for (i = 1; i <= NF; i++) {
        if ($i ~ /,/) {
          count++
          if (count == occurrence) {
            gsub(/,/, " |||", $i)
          }
        }
      }
      print
    }' "$file" > tmpfile && mv tmpfile "$file"
  done
}

# Function: display_custom_help
# Description:
#   This function displays a custom help menu with various commands, environment information,
#   and useful instructions for different tools and services. It provides a quick reference
#   for commonly used commands, LLM setups, taggers, captioners, and database configurations.
display_custom_help() {
    echo "----------------------------------------------------------------------------------------------------------------------"
    printf "%s\n" "$(conda env list)"
    echo "----------------------------------------------------------------------------------------------------------------------"
    echo "LLMs"
    echo "---"
    echo "conda activate openwebui && PORT=6969 $HOME/source/repos/open-webui/backend/start.sh"
    #echo "conda activate openwebui && open-webui serve --port 6969"
    echo "ollama serve"
    echo "----------------------------------------------------------------------------------------------------------------------"
    echo "Taggers + Captioners"
    echo "----------------------------------------------------------------------------------------------------------------------"
    echo "gallery-dl --cookies-from-browser firefox https://x.com/whatever"
    echo "JTP2"
    echo "---"
    echo "~/toolkit/jtp2 <dir>"
    echo "Joy Captioner"
    echo "---"
    echo "~/source/repos/joy/joy <dir> --custom_prompt \"<prompt>\" --caption_type custom"
    echo "Waifu Diffusion Tagger:"
    echo "---"
    echo "python ~/source/repos/wdv3-timm/wdv3_timm.py <dir> --model eva02"
    echo "----------------------------------------------------------------------------------------------------------------------"
    echo "Database Stuff"
    echo "----------------------------------------------------------------------------------------------------------------------"
    echo "Redis"
    echo "---"
    echo "~/db/redis-stable/src/redis-server      : Start server."
    echo "PostgreSQL"
    echo "---"
    echo "psql -d postgres -h /tmp                : Connect using socket directory."
    echo "Start server:"
    echo "pg_ctl -D \$HOME/db/postgresql/data -l \$HOME/db/pgsql.log start"
    echo "----------------------------------------------------------------------------------------------------------------------"
    echo "Commands, Aliases, and Custom Functions:"
    echo "----------------------------------------------------------------------------------------------------------------------"
    echo "pie                                         : \`pip install -e . --use-pep517\`"
    echo "gcs                                         : \`git clone --recurse-submodules\`"
    echo "dust                                        : A more intuitive version of du."
    echo "ranger                                      : A vim inspired file manager."
    echo "htop                                        : Interactive process viewer."
    echo "nvtop                                       : Interactive GPU process viewer."
    echo "nvitop                                      : An even more interactive GPU process viewer."
    echo "nvim                                        : Alias for vim."
    echo "rt                                          : Edit tmux config and reload it."
    echo "zr                                          : Edit zsh config and reload it."
    echo "ta                                          : Attach to tmux session."
    echo "ga                                          : Git add, commit, and push."
    echo "gs                                          : Git status."
    echo "wd                                          : Word diff in git."
    echo "grabber                                     : Alias for Grabber-cli."
    echo "ls                                          : Alias for 'ls --color=always'."
    echo "ll                                          : List all files in long format, including hidden files, with human-readable sizes and colorized output."
    echo "cp                                          : Use the 'reflink' option for 'cp' to enable copy-on-write when possible, improving efficiency."
    echo "t                                           : Launch TensorBoard with the log directory set to the user's output_dir/logs."
    echo "接                                          : Attach to an existing tmux session."
    echo "gc                                          : Create a signed, verbose git commit."
    echo "dir                                         : Display directory contents with colorized output."
    echo "ezc                                         : Quickly edit and reload the zsh configuration file."
    echo "nv                                          : Returns the CUDA version number."
    echo "remove_repetition                           : Removes repetition in txt files in a target directory."
    echo "copy_sample_prompts                         : Copies ./sample-prompt.txt file from the current dir to datasets/furry."
    echo "remove_number_prefix                        : Removes all numbers prefixed by a _ from the end of every file."
    echo "count_captions                              : Counts *.caption and *.txt files in each subdirectory."
    echo "count_captions_per_folder                   : Counts *.caption and *.txt files in each subdirectory individually."
    echo "copy_matching_caption_files                 : Copies matching .caption files for <dir> to the current directory."
    echo "c                                           : Change to ComfyUI directory and start the server."
    echo "t                                           : Start TensorBoard with logs directory."
    echo "png2mp4                                     : Convert PNG sequence to MP4 video."
    echo "seed <file>                                 : Display the seed from a safetensors file."
    echo "swch <branch>                               : Clean repo and switch to specified git branch."
    echo "convert_to_jxl <directory>                  : Convert JPG, JPEG, and PNG files to JXL in the specified directory."
    echo "convert_pxl_to_png <directory>              : Convert PXL files to PNG in the specified directory."
    echo "replace_text_in_files [dir] <src> <replace> : Perform text replacement on *.txt files in a target directory."
    echo "update_dir [directory]                      : Update git repositories in subdirectories."
    echo "inject_to_captions [dir] \"txt\"              : Add prefix to the beginning of each text file in a directory."
    echo "chop_lora <input_file>                      : Generate multiple versions of a Lora file with different presets."
    echo "----------------------------------------------------------------------------------------------------------------------"
}

# This function `nv` retrieves the version of the NVIDIA CUDA Compiler (nvcc) installed on the system.
# It extracts the version number from the `nvcc --version` command output.
# The version number is then formatted by removing the dot (e.g., 12.6 becomes 126).
# Finally, the function returns the formatted version number.
nv() {
    # Get the nvcc version output
    local nvcc_output=$(nvcc --version)

    # Extract the version number (12.6)
    local version=$(echo "$nvcc_output" | grep -oP 'release \K[0-9]+\.[0-9]+')

    # Remove the dot to get 126
    local result=$(echo "$version" | tr -d '.')

    # Print the result
    echo $result
}

# Function to remove consecutive repeated words in text files within a directory
remove_repetition() {
  local dir=$1  # The directory to search for text files
  # Find all .txt files in the specified directory and process each file
  find "$dir" -type f -name "*.txt" | while read -r file; do
    # Use awk to process each line of the file
    awk '
    {
      n = split($0, words, " ")  # Split the line into words
      for (i = n; i > 1; i--) {  # Iterate from the last word to the second word
        if (words[i] != words[i-1]) break  # Stop if the current word is not equal to the previous word
      }
      for (j = 1; j <= i; j++) {  # Print the words up to the point where repetition ends
        printf "%s%s", words[j], (j == i ? ORS : OFS)  # Print the word followed by a space or newline
      }
    }
    ' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
  done
}

# Organizes a sample prompt file from the current directory to datasets/furry.
# It moves the file named sample-prompts.txt to either
# ~/datasets/furry/sample_prompts/pony or ~/datasets/furry/sample_prompts/compass based on the content.
# If the file contains the regexp 'score_*', it is moved to ~/datasets/furry/sample_prompts/pony.
# Otherwise, it is moved to ~/datasets/furry/sample_prompts/compass.
# The -v option is used with cp to provide verbose output.
copy_sample_prompts() {
    file="./sample-prompts.txt"
    if grep -q 'score_*' "$file"; then
        cp -v "$file" ~/datasets/furry/sample_prompts/pony/
    else
        cp -v "$file" ~/datasets/furry/sample_prompts/compass/
    fi

    echo "File has been organized."
}

# Removes all numbers prefixed by a _ from the end of every file before the file extension
remove_number_prefix() {
  # Loop through all files in the current directory and its subdirectories
  for file in **/*_[0-9]*.*; do
    # Get the new file name by removing '_number' before the file extension
    new_file="${file%_[0-9]*.*}.${file##*.}"
    # Rename the file
    mv "$file" "$new_file"
  done
}

# Counts all *.caption and *.txt files in all subdirectories.
count_captions() {
  caption_count=$(find . -type f -name "*.caption" | wc -l)
  txt_count=$(find . -type f -name "*.txt" | wc -l)
  echo "*.caption files: $caption_count"
  echo "*.txt files: $txt_count"
}

# Counts *.caption and *.txt files in each subdirectory individually.
count_captions_per_folder() {
  for dir in */; do
    echo "Directory: $dir"
    echo -n "*.caption files: "
    find "$dir" -type f -name "*.caption" | wc -l
    echo -n "*.txt files: "
    find "$dir" -type f -name "*.txt" | wc -l
  done
}

# open-webui
oui() {
  conda activate openwebui
  open-webui serve --port 6969 
}

# Function to copy matching .caption files
copy_matching_caption_files() {
  # Define the target directory
  TARGET_DIR="$1"

  # Loop through each image file in the current directory
  for image_file in *.(jpg|jpeg|png|gif|bmp|tiff|webp|jxl); do
    # Check if the file exists (to handle cases where no files match the pattern)
    if [[ -f "$image_file" ]]; then
      # Extract the base name (without extension)
      base_name="${image_file%.*}"
      
      # Define the corresponding .caption file in the target directory
      caption_file="$TARGET_DIR/$base_name.caption"
      
      # Check if the .caption file exists
      if [[ -f "$caption_file" ]]; then
        # Copy the .caption file to the current directory
        cp "$caption_file" .
        echo "Copied $caption_file to the current directory."
      else
        echo "No matching .caption file for $image_file."
      fi
    fi
  done
}

# This script performs a text replacement operation in all .txt files within a specified directory.
# It takes three arguments:
# 1. target_dir: The directory containing the .txt files where the text replacement will occur.
# 2. search_text: The text string that needs to be replaced.
# 3. replace_text: The text string that will replace the search_text.
#
# The script uses a for loop to iterate through all .txt files in the target directory.
# It utilizes the 'sed' command to perform an in-place replacement of the search_text with the replace_text in each file.
# After processing all files, it prints a message indicating the completion of the text replacement operation.
# Example usage:
# replace_text_in_files "/path/to/directory" "squishy (artist)" "by squishy (artist)"
replace_text_in_files() {
  local target_dir=$1
  local search_text=$2
  local replace_text=$3

  # Loop through all .txt files in the target directory
  for file in "$target_dir"/*.txt; do
    # Use sed to replace the text
    sed -i "s/$search_text/$replace_text/g" "$file"
  done

  echo "Text replacement complete in $target_dir!"
}

# This script adds a specified prefix to the beginning of each text file in a given directory.
# If the prefix already exists in the text file, it moves the prefix to the front of the text file without leaving extra commas or spaces.
# Usage: inject_to_tags <directory> <prefix>
# Arguments:
#   <directory> - The directory containing the text files to be modified.
#   <prefix> - The prefix to be added to the beginning of each text file.
# The script checks if the specified directory exists and iterates over each text file in the directory.
# For each text file, it creates a temporary file with the modified content and then replaces the original file with the temporary file.
# If the directory does not exist, it prints an error message.

inject_to_txt() {
    local dir="$1"
    local prefix="$2"
    if [[ -d "$dir" ]]; then
        for file in "$dir"/*.txt; do
            if [[ -f "$file" ]]; then
                if grep -q "$prefix" "$file"; then
                    # Move the existing prefix to the front of the text file without leaving extra commas or spaces
                    local temp_file=$(mktemp)
                    sed "s/$prefix//" "$file" | sed "1s/^/${prefix}, /" | sed 's/^, //' | sed 's/,,/,/g' | sed 's/, ,/,/g' | sed 's/ ,/,/g' > "$temp_file"
                    mv "$temp_file" "$file"
                    echo "Moved '${prefix}' to the front of $file"
                else
                    # Use a temporary file to store the modified content
                    local temp_file=$(mktemp)
                    echo "${prefix}, $(cat "$file")" | sed 's/,,/,/g' | sed 's/, ,/,/g' | sed 's/ ,/,/g' > "$temp_file"
                    mv "$temp_file" "$file"
                    echo "Added '${prefix}, ' to the front of $file"
                fi
            fi
        done
    else
        echo "Directory $dir does not exist."
    fi
}

inject_to_tags() {
    local dir="$1"
    local prefix="$2"
    if [[ -d "$dir" ]]; then
        for file in "$dir"/*.tags; do
            if [[ -f "$file" ]]; then
                if grep -q "$prefix" "$file"; then
                    # Move the existing prefix to the front of the text file without leaving extra commas or spaces
                    local temp_file=$(mktemp)
                    sed "s/$prefix//" "$file" | sed "1s/^/${prefix}, /" | sed 's/^, //' | sed 's/,,/,/g' | sed 's/, ,/,/g' | sed 's/ ,/,/g' > "$temp_file"
                    mv "$temp_file" "$file"
                    echo "Moved '${prefix}' to the front of $file"
                else
                    # Use a temporary file to store the modified content
                    local temp_file=$(mktemp)
                    echo "${prefix}, $(cat "$file")" | sed 's/,,/,/g' | sed 's/, ,/,/g' | sed 's/ ,/,/g' > "$temp_file"
                    mv "$temp_file" "$file"
                    echo "Added '${prefix}, ' to the front of $file"
                fi
            fi
        done
    else
        echo "Directory $dir does not exist."
    fi
}

# Function to update git repositories in subdirectories
update_dir() {
    local target_dir="${1:-.}"
    
    # Check if there are any subdirectories
    if [[ -n "$(find "$target_dir" -mindepth 1 -maxdepth 1 -type d)" ]]; then
        for dir in "$target_dir"/*/; do
            if [[ -d "$dir" ]]; then
                (
                    cd "$dir" || return
                    # If the directory is a git repository, pull the latest changes
                    if [[ -d ".git" ]]; then
                        echo "Updating $(pwd)"
                        git pull
                    fi
                )
            fi
        done
    fi
}

# Function: chop_lora
# Description:
#   This function processes a LoRA (Low-Rank Adaptation) model file by selectively
#   keeping or removing specific layers based on predefined presets. It uses the
#   chop_blocks.py script to perform the actual layer manipulation.
#
# Usage:
#   chop_lora <input_file>
#
# Parameters:
#   $1 - The input LoRA model file (typically a .safetensors file)
#
# Presets:
#   The function defines several presets, each represented by a 21-digit binary string:
#   - ringdingding: This vector string was used for the Stoat LoRA.
#   - squeaker: I really have no idea what this is.
#   - heavylifter: Keeps only one specific layer that seems to learn the most.
#   - style1 and style2: Different configurations for style transfer
#   - beeg: A configuration that keeps only the largest layers.
#   - all: Keeps all layers
#   - allin: Keeps only the input layers
#   - allmid: Keeps only the middle layers
#   - allout: Keeps only the output layers
#
# Actions:
#   1. Extracts the base name of the input file (without extension)
#   2. Iterates through each preset
#   3. For each preset, generates an output filename and runs the chop_blocks.py script
#   4. The script creates a new LoRA file with only the specified layers retained
#
# Output:
#   Creates multiple output files, one for each preset, named as:
#   "<base_name>-<preset_name>.safetensors"
#
# Notes:
#   - Requires the chop_blocks.py script to be located at ~/source/repos/resize_lora/chop_blocks.py
#   - The binary strings represent which layers to keep (1) or remove (0)
#   - This function allows for quick generation of multiple variants of a LoRA model,
#     each emphasizing different aspects or effects
chop_lora() {
    local input_file="$1"
    local base_name="${input_file:r}"  # Remove extension
    
    # Define presets and their corresponding vector strings
    declare -A presets=(
        ["ringdingding"] = "1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0"
        ["squeaker"]     = "1,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0"
        ["heavylifter"]  = "1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0"
        ["style1"]       = "1,0,0,0,1,1,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0"
        ["style2"]       = "1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0"
        ["beeg"]         = "1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,0"
        ["all"]          = "1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1"
        ["allin"]        = "1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0"
        ["allmid"]       = "1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0"
        ["allout"]       = "1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1"
    )
    
    for preset in ${(k)presets}; do
        local output_file="${base_name}-${preset}.safetensors"
        local vector_string="${presets[$preset]}"
        echo "Generating $output_file"
        python ~/source/repos/resize_lora/chop_blocks.py "$input_file" "$vector_string" -o "$output_file"
    done
}

# Function cs1
# This function chops blocks from an SDXL LoRA's safetensors file to preserve the style information only.
# It uses a specific block configuration and saves the output with a modified filename.
cs1() {
    # Get the target safetensors file path from the first argument
    local target_safetensors=$1
    
    # Extract the base name of the target safetensors file (without the .safetensors extension)
    local base_name=$(basename "$target_safetensors" .safetensors)
    
    # Extract the version and step string from the base name (e.g., v1s400)
    local version_step=$(echo "$base_name" | grep -o 'v[0-9]*s[0-9]*')
    
    # Remove the version and step string from the base name to avoid duplication
    local base_name_no_version=$(echo "$base_name" | sed "s/${version_step}//")
    
    # Construct the output safetensors filename by appending c1 to the version and step string
    local output_safetensors="${base_name_no_version}${version_step}c1.safetensors"
    
    # Run the chop_blocks command with the specified block configuration and output filename
    ~/toolkit/chop_blocks "$target_safetensors" 1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0 -o "$output_safetensors"
}

# Function cs2
# This function chops blocks from an SDXL LoRA's safetensors file to preserve the style information only.
# It uses a different block configuration and saves the output with a modified filename.
cs2() {
    # Get the target safetensors file path from the first argument
    local target_safetensors=$1
    
    # Extract the base name of the target safetensors file (without the .safetensors extension)
    local base_name=$(basename "$target_safetensors" .safetensors)
    
    # Extract the version and step string from the base name (e.g., v1s400)
    local version_step=$(echo "$base_name" | grep -o 'v[0-9]*s[0-9]*')
    
    # Remove the version and step string from the base name to avoid duplication
    local base_name_no_version=$(echo "$base_name" | sed "s/${version_step}//")
    
    # Construct the output safetensors filename by appending c2 to the version and step string
    local output_safetensors="${base_name_no_version}${version_step}c2.safetensors"
    
    # Run the chop_blocks command with the specified block configuration and output filename
    ~/toolkit/chop_blocks "$target_safetensors" 1,0,0,0,1,1,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0 -o "$output_safetensors"
}

# Function: swch (Switch Git Branch)
# Description:
#   This function facilitates switching between Git branches while ensuring a clean working directory.
#
# Usage:
#   swch <branch_name>
#
# Parameters:
#   $1 - The name of the branch to switch to.
#
# Actions:
#   1. Checks if a branch name is provided.
#   2. Cleans the working directory, removing untracked files and directories.
#   3. Pulls the latest changes from the remote repository.
#   4. Checks out the specified branch.
#
# Notes:
#   - Use with caution as 'git clean -fxd' will remove all untracked files and directories.
#   - Ensure all important changes are committed or stashed before using this function.
swch() {
    if [ -z "$1" ]; then
        echo "Please provide a branch name."
        return 1
    fi
    branchname=$1
    git clean -fxd && git pull && git checkout $branchname
}

# Function: extract_iframes
# Description:
#   This function extracts I-frames from a video file using ffmpeg.
#
# Usage:
#   extract_iframes <input_file> [<scene_change_fraction>]
#
# Parameters:
#   $1 - The input video file (required)
#   $2 - The scene change fraction threshold (optional, default: 0.1)
#
# Actions:
#   1. Assigns input arguments to variables
#   2. Extracts the base filename without extension
#   3. Runs ffmpeg to extract I-frames based on the scene change threshold
#   4. Saves extracted frames as PNG files with sequential numbering
#
# Notes:
#   - Requires ffmpeg to be installed and accessible via /usr/bin/ffmpeg
#   - Output files will be named as "<base_name>-XXXXXX.png" in the current directory
extract_iframes() {
    # Assign input arguments
    input_file="$1"
    scene_change_fraction="${2:-0.1}"

    # Get the base filename without extension
    base_name=$(basename "$input_file" .webm)

    # Run ffmpeg command
    /usr/bin/ffmpeg -i "$input_file" -f image2 -vf "select=eq(pict_type\,PICT_TYPE_I)*gt(scene\,$scene_change_fraction),showinfo" -fps_mode vfr "${base_name}-%06d.png"
}

# Function: seed
# Description:
#   This function extracts the seed value from a LoRA (Low-Rank Adaptation) model's metadata.
#
# Usage:
#   seed <file_path>
#
# Parameters:
#   $1 - The path to the LoRA model file (usually a .safetensors file)
#
# Actions:
#   1. Takes the file path as an argument
#   2. Uses Python to read the safetensors file
#   3. Extracts the metadata from the file
#   4. Attempts to retrieve the 'ss_seed' value from the metadata
#   5. Prints the seed value if found, or 'Not found' if not present
#
# Notes:
#   - Requires Python 3 with the 'safetensors' module installed
#   - The seed is typically used to reproduce the exact training conditions of the LoRA
#   - If the seed is not found, it may indicate the LoRA was created without recording this information
seed() {
    local filePath="$1"
    python3 -c "
import safetensors, json
filePath = '$filePath'
print(json.loads(safetensors.safe_open(filePath, 'np').metadata().get('ss_seed', 'Not found')))"
}

source ~/toolkit/zsh/install_members.zsh
source ~/toolkit/zsh/gallery-dl.zsh
source ~/toolkit/zsh/png2mp4.zsh

# Function: c
# Description:
#   This function launches ComfyUI with specific settings tailored to the user's preferences.
#
# Usage:
#   c
#
# Actions:
#   1. Changes directory to ~/ComfyUI
#   2. Activates the 'comfyui' conda environment
#   3. Launches ComfyUI with the following settings:
#      - Listens on all network interfaces (0.0.0.0)
#      - Uses 'taesd' as the preview method
#      - Enables PyTorch cross-attention
#      - Disables xformers
#      - Uses the latest version of Comfy-Org/ComfyUI_frontend
#      - Enables fast mode
#
# Parameters:
#   None
#
# Notes:
#   - Requires ComfyUI to be installed in ~/ComfyUI
#   - Requires a conda environment named 'comfyui' with necessary dependencies
#   - The --listen 0.0.0.0 option allows access from other devices on the network
#   - --preview-method taesd provides better previews
#   - --use-pytorch-cross-attention and --disable-xformers affect performance and compatibility
#   - --front-end-version ensures the latest UI is used
c() {
    cd ~/ComfyUI &&
    conda activate comfyui
    python main.py --listen 0.0.0.0 --preview-method taesd --use-pytorch-cross-attention --disable-xformers
    # --front-end-version /home/kade/source/repos/ComfyUI_frontend/dist 
}
#
# Usage:
#   conda_prompt_info
#
# Returns:
#   A string containing the name of the active Conda environment, enclosed in parentheses.
#   If no Conda environment is active, it returns an empty string.
#
# Details:
#   1. Checks if the CONDA_DEFAULT_ENV environment variable is set and non-empty.
#   2. If CONDA_DEFAULT_ENV is set, it echoes the environment name in parentheses.
#   3. If CONDA_DEFAULT_ENV is not set or empty, the function returns silently.
#
# Example output:
#   If CONDA_DEFAULT_ENV is set to "myenv", the function will output: (myenv)
#
# Notes:
#   - This function is typically used in command prompts or shell scripts to
#     visually indicate the active Conda environment to the user.
#   - It can be incorporated into PS1 or other prompt variables to automatically
#     display the Conda environment in the shell prompt.
conda_prompt_info() {
  if [[ -n "$CONDA_DEFAULT_ENV" ]]; then
    echo "(${CONDA_DEFAULT_ENV})"
  fi
}

# Setup zoxide
eval "$(zoxide init zsh)"

# Function: display_custom_help
# Description:
#   This function displays custom help information for user-defined functions and aliases.
#   It provides a quick reference for commonly used commands and their descriptions.
#
# Usage:
#   display_custom_help
#
# Output:
#   Prints a formatted list of custom commands and their brief descriptions.
#
# Note:
#   Add or modify entries in this function to keep your personal command reference up-to-date.
#display_custom_help