diff --git a/epoch100/adapter_config.json b/epoch100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch100/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch100/adapter_model.safetensors b/epoch100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..edbd941b9e43d54e2c9abe25334a4208e2e22cd9 --- /dev/null +++ b/epoch100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc63f3ccf550bd7278935a5384d18abf9c75878fd8f26b6484a27a563b62199c +size 322519480 diff --git a/epoch100/hunyuan_config.toml b/epoch100/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch100/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch104/adapter_config.json b/epoch104/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch104/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch104/adapter_model.safetensors b/epoch104/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..903082959d44d4a81a20445babdf54af62d4755b --- /dev/null +++ b/epoch104/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d1fca1401292c3c36b2c366aa833e234b0d435b347bdeb637cd848b27b208d6 +size 322519480 diff --git a/epoch104/hunyuan_config.toml b/epoch104/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch104/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch108/adapter_config.json b/epoch108/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch108/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch108/adapter_model.safetensors b/epoch108/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f829ed48b3c763698c5270130c67b72a066b59a1 --- /dev/null +++ b/epoch108/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b70c49fa1225af28314e41f4f389993235b562fd2647652d15bce9ab43514505 +size 322519480 diff --git a/epoch108/hunyuan_config.toml b/epoch108/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch108/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch112/adapter_config.json b/epoch112/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch112/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch112/adapter_model.safetensors b/epoch112/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..13740b43c18914e0f35b9fc6d1bbf2f8633822cb --- /dev/null +++ b/epoch112/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63faddadf9540dbf2275fd2e93e6301ff888e5885f040f8dc069027a8619c40d +size 322519480 diff --git a/epoch112/hunyuan_config.toml b/epoch112/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch112/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch116/adapter_config.json b/epoch116/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch116/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch116/adapter_model.safetensors b/epoch116/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76e0852c9e204eff0d5e63a56bf476950f3ea761 --- /dev/null +++ b/epoch116/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dc983ff8f2245aa4af5d02d450e55b60aa03c7b28270751da68d6c389fb8083 +size 322519480 diff --git a/epoch116/hunyuan_config.toml b/epoch116/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch116/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch12/adapter_config.json b/epoch12/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch12/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch12/adapter_model.safetensors b/epoch12/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64cca0f85b7bc9c0a4869e46af95d7fb654a2177 --- /dev/null +++ b/epoch12/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78414cb47b3f6b71de54d64fc75ce24891411fb626d7ac4a41efac7e38abe562 +size 322519480 diff --git a/epoch12/hunyuan_config.toml b/epoch12/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch12/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch120/adapter_config.json b/epoch120/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch120/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch120/adapter_model.safetensors b/epoch120/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3123a1e28ca48d4548eb2d2e279763ccbe2694b9 --- /dev/null +++ b/epoch120/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc6d23cdbdfacbd863d29466011df66521a910715acb1247d0cc54e4d6941d8 +size 322519480 diff --git a/epoch120/hunyuan_config.toml b/epoch120/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch120/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch124/adapter_config.json b/epoch124/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch124/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch124/adapter_model.safetensors b/epoch124/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a81740f51d794d3beee6cd03dc6fe9ac2ced296 --- /dev/null +++ b/epoch124/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f997957e09710dfad00dc23e8592f5ed1295b40e347b211ddd2dafab38beb181 +size 322519480 diff --git a/epoch124/hunyuan_config.toml b/epoch124/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch124/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch128/adapter_config.json b/epoch128/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch128/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch128/adapter_model.safetensors b/epoch128/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64f49bc0c6d69a9efe26fd8901fdde67d3ce2810 --- /dev/null +++ b/epoch128/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6234471dcb2faaa15261c88165de238cb9752c0252d0bd000cdb6114f3ffdb60 +size 322519480 diff --git a/epoch128/hunyuan_config.toml b/epoch128/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch128/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch132/adapter_config.json b/epoch132/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch132/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch132/adapter_model.safetensors b/epoch132/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc0f80f014b789ab54aa6ce4addc3e88a93a1ea3 --- /dev/null +++ b/epoch132/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4edd799fb2d4b5b838f9172fd2ebe90fd9f7f169da2701311c929cb0d555964 +size 322519480 diff --git a/epoch132/hunyuan_config.toml b/epoch132/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch132/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch136/adapter_config.json b/epoch136/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch136/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch136/adapter_model.safetensors b/epoch136/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50ee95cf3c28fcbafa6c1821ef2b219ecc623351 --- /dev/null +++ b/epoch136/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af3a6f999acfa39b46058c8e91a23cfa0c2c904b080637c21309dcce3367e3da +size 322519480 diff --git a/epoch136/hunyuan_config.toml b/epoch136/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch136/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch140/adapter_config.json b/epoch140/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch140/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch140/adapter_model.safetensors b/epoch140/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71c2f641ad9b9f99687e38db49191fc1869fa831 --- /dev/null +++ b/epoch140/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61274788213d842ddb1b9082bb941378a1c66aa3f7fe654427d0eda3ab547913 +size 322519480 diff --git a/epoch140/hunyuan_config.toml b/epoch140/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch140/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch144/adapter_config.json b/epoch144/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch144/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch144/adapter_model.safetensors b/epoch144/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ba146f2b52bc206f2681c6be1132e86fffef868 --- /dev/null +++ b/epoch144/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:275674607d3a829c159cbec32e0b887f85c687145618f46dbea90e290328bfcf +size 322519480 diff --git a/epoch144/hunyuan_config.toml b/epoch144/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch144/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch148/adapter_config.json b/epoch148/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch148/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch148/adapter_model.safetensors b/epoch148/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4ba0e061146534636e3d312cac29a5aaa15ccb6 --- /dev/null +++ b/epoch148/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cfc441c42531f2697fea72f0356dea7b451e4b1c7bda12f19b308f3e520a018 +size 322519480 diff --git a/epoch148/hunyuan_config.toml b/epoch148/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch148/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch152/adapter_config.json b/epoch152/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch152/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch152/adapter_model.safetensors b/epoch152/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c986276c24a1e0d7b53fbf6802e724938bee35c --- /dev/null +++ b/epoch152/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67215223c3ea863d6406fb8533902e246950dd78e971d400386e448e6309aee3 +size 322519480 diff --git a/epoch152/hunyuan_config.toml b/epoch152/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch152/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch156/adapter_config.json b/epoch156/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch156/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch156/adapter_model.safetensors b/epoch156/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78d31768054ab4560fc34c3da867d401edac8abd --- /dev/null +++ b/epoch156/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a322958a74f9cda35f6032de6d6d1931d810d86adb5246e69f2cb2af26b7981 +size 322519480 diff --git a/epoch156/hunyuan_config.toml b/epoch156/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch156/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch16/adapter_config.json b/epoch16/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch16/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch16/adapter_model.safetensors b/epoch16/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9137553b83c1563c106797de18334959a9c5a77 --- /dev/null +++ b/epoch16/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd025bd0d222b9f859ff18d78197b11e43021ac0ead66bd694d4e620a7adad7c +size 322519480 diff --git a/epoch16/hunyuan_config.toml b/epoch16/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch16/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch160/adapter_config.json b/epoch160/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch160/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch160/adapter_model.safetensors b/epoch160/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e701ad24efa58bba05f53a9869643b53e9a451e8 --- /dev/null +++ b/epoch160/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fbf4eabf2aacf44d3a775fc60830dfe12bb1c5ee1b48df5bc7d6ff3b53e8f61 +size 322519480 diff --git a/epoch160/hunyuan_config.toml b/epoch160/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch160/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch164/adapter_config.json b/epoch164/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch164/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch164/adapter_model.safetensors b/epoch164/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a984dc495d9ea2495520c784940dcca21b5bc4c8 --- /dev/null +++ b/epoch164/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90095b9c34ecf76fc48468bc1c20de9b2b3156a99d05758ea89183cb79810a30 +size 322519480 diff --git a/epoch164/hunyuan_config.toml b/epoch164/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch164/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch168/adapter_config.json b/epoch168/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch168/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch168/adapter_model.safetensors b/epoch168/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2399a5d71ad2fdb17dccbcbded70049bd88f3174 --- /dev/null +++ b/epoch168/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc79126e3d5739f748135d9a9c18fe7b98920a008d346e04a892110690262db4 +size 322519480 diff --git a/epoch168/hunyuan_config.toml b/epoch168/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch168/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch172/adapter_config.json b/epoch172/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch172/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch172/adapter_model.safetensors b/epoch172/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23b6d819fe290026d7011741b5a402340c835428 --- /dev/null +++ b/epoch172/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1273dee4637a01587b7dd336bc90c46ddbb41a4dc81ec084cb2d9c4a75c1ee15 +size 322519480 diff --git a/epoch172/hunyuan_config.toml b/epoch172/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch172/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch176/adapter_config.json b/epoch176/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch176/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch176/adapter_model.safetensors b/epoch176/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2bcce2224d0b5096e8be9b3a7fb87034b16de0c --- /dev/null +++ b/epoch176/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54637a3649fd489406fe8fe8ffc3b3ea23fa0bce2be69ae47b8bab753179dc9a +size 322519480 diff --git a/epoch176/hunyuan_config.toml b/epoch176/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch176/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch20/adapter_config.json b/epoch20/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch20/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch20/adapter_model.safetensors b/epoch20/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca9d5495aaa5b6653be59425af59f5c0bb6b9d6a --- /dev/null +++ b/epoch20/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e513d879a465f728b9269a16a0a64ee3a34ac39f0f43fd95283f707c3c835af6 +size 322519480 diff --git a/epoch20/hunyuan_config.toml b/epoch20/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch20/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch24/adapter_config.json b/epoch24/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch24/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch24/adapter_model.safetensors b/epoch24/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d73f35ba251702555997d41e9ddddb446789e8a4 --- /dev/null +++ b/epoch24/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a50d9f908154967afbbbc6f680aa7fc65230971b346935a38205b01ebdfa817 +size 322519480 diff --git a/epoch24/hunyuan_config.toml b/epoch24/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch24/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch28/adapter_config.json b/epoch28/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch28/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch28/adapter_model.safetensors b/epoch28/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c8b782f213e49828dcc75a55729deeb881b48cc --- /dev/null +++ b/epoch28/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ed9c14e4ed331d727cf882a7b06d88c6c98583ff80764cb84e32f507f5b2b1 +size 322519480 diff --git a/epoch28/hunyuan_config.toml b/epoch28/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch28/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch32/adapter_config.json b/epoch32/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch32/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch32/adapter_model.safetensors b/epoch32/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7839de4bd781d7fc7c79d3bce78879f892b9412 --- /dev/null +++ b/epoch32/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:611930063e46c6897f5371de4cbcd6d4c52464ed0aded614ca211fc9855dcfbb +size 322519480 diff --git a/epoch32/hunyuan_config.toml b/epoch32/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch32/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch36/adapter_config.json b/epoch36/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch36/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch36/adapter_model.safetensors b/epoch36/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03f09e286a30f5e156b5a7ff4b257b0db9e89dc7 --- /dev/null +++ b/epoch36/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7daec57d8af33427d2e4f530153796a8c38c32af5262f23bdd1674a698b08400 +size 322519480 diff --git a/epoch36/hunyuan_config.toml b/epoch36/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch36/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch4/adapter_config.json b/epoch4/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch4/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch4/adapter_model.safetensors b/epoch4/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0099e54c28df971a894823cbe85271484720ccb --- /dev/null +++ b/epoch4/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584b0b7c12bdf2bdae91ace2b9d488444064603096696b9d225806e09f9621a9 +size 322519480 diff --git a/epoch4/hunyuan_config.toml b/epoch4/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch4/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch40/adapter_config.json b/epoch40/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch40/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch40/adapter_model.safetensors b/epoch40/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ab459d6f7881bf5d72f79849a898c65fca4a790 --- /dev/null +++ b/epoch40/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ba302c08cb118fabb29fd398adbb16297343672f5ef68f17e062d2340f66554 +size 322519480 diff --git a/epoch40/hunyuan_config.toml b/epoch40/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch40/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch44/adapter_config.json b/epoch44/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch44/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch44/adapter_model.safetensors b/epoch44/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a16600ab1da74f8ecf2509416c32e57322da9776 --- /dev/null +++ b/epoch44/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8737c71d64fda1fdaf3b20b4b344db3d745489e48f688e8c220f15553c6cca3f +size 322519480 diff --git a/epoch44/hunyuan_config.toml b/epoch44/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch44/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch48/adapter_config.json b/epoch48/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch48/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch48/adapter_model.safetensors b/epoch48/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7245ad3ff730197aeca97f4dd9296dc2074707fd --- /dev/null +++ b/epoch48/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31b890c8147d8773d35e28f7d0fb765ad2c01175138d9978e53854e12a7910c7 +size 322519480 diff --git a/epoch48/hunyuan_config.toml b/epoch48/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch48/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch52/adapter_config.json b/epoch52/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch52/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch52/adapter_model.safetensors b/epoch52/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20f87010e88ef96c70064ae22e0cfe534cccc2a9 --- /dev/null +++ b/epoch52/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:741d5926f4b3813de58d4145cdc76c83b1a1b2ef166a7f2239f0df157b0c16bb +size 322519480 diff --git a/epoch52/hunyuan_config.toml b/epoch52/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch52/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch56/adapter_config.json b/epoch56/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch56/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch56/adapter_model.safetensors b/epoch56/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43e2b286cd1ba490a4d062f405ee5b8227e69295 --- /dev/null +++ b/epoch56/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e1351cfcce777cb51f37b4d492c9788a41eeeb16b71f70bc63782285030c99 +size 322519480 diff --git a/epoch56/hunyuan_config.toml b/epoch56/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch56/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch60/adapter_config.json b/epoch60/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch60/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch60/adapter_model.safetensors b/epoch60/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e456925086f0c1d355adabbfb393f45ed2e6887 --- /dev/null +++ b/epoch60/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaae412fb25caf88e1331fd369d5e47b1ba484126d8c7bbe9c5a7a4e5501d133 +size 322519480 diff --git a/epoch60/hunyuan_config.toml b/epoch60/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch60/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch64/adapter_config.json b/epoch64/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch64/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch64/adapter_model.safetensors b/epoch64/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6ee7e0cb4994e98c5e5d6b345d1407beb1297ffc --- /dev/null +++ b/epoch64/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c3584ab4ce0bca44f56d578cb784071c35847305b80ea20ebdd223cc72713f7 +size 322519480 diff --git a/epoch64/hunyuan_config.toml b/epoch64/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch64/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch68/adapter_config.json b/epoch68/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch68/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch68/adapter_model.safetensors b/epoch68/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82d06b2b31069eaf9f971985c54e95352a8767bd --- /dev/null +++ b/epoch68/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be6b4aac5a0abc10c92497edbf6092832aef57abcf1671cec969d20132d2273f +size 322519480 diff --git a/epoch68/hunyuan_config.toml b/epoch68/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch68/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch72/adapter_config.json b/epoch72/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch72/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch72/adapter_model.safetensors b/epoch72/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08356dac6af19b1eb721a8b68d3396bcf544fe21 --- /dev/null +++ b/epoch72/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b94c3ec8a590f503b7a1f9eb514447b6c085c064bef9f4ef166525542d0c182 +size 322519480 diff --git a/epoch72/hunyuan_config.toml b/epoch72/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch72/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch76/adapter_config.json b/epoch76/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch76/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch76/adapter_model.safetensors b/epoch76/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ec8877c779eb85f1f97a7089e651b0bc712cfda --- /dev/null +++ b/epoch76/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3beb3daff8cd12ccb1e5016f64a466c0d4c0aecad5c94fb266a0cca74ad52ee +size 322519480 diff --git a/epoch76/hunyuan_config.toml b/epoch76/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch76/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch8/adapter_config.json b/epoch8/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch8/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch8/adapter_model.safetensors b/epoch8/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69879930a750478881a1de522b7e37fb5c71f027 --- /dev/null +++ b/epoch8/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82185a0cd6ecb09dfebb5522edea733e4cc0804d68dc67dacccd8c18d77a09e5 +size 322519480 diff --git a/epoch8/hunyuan_config.toml b/epoch8/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch8/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch80/adapter_config.json b/epoch80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch80/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch80/adapter_model.safetensors b/epoch80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef814d84f5544ad7871587d673163c166d6b7851 --- /dev/null +++ b/epoch80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98f3a7010f01525e4bf2685e4707c40fd319c0477ac57b1bd7c3dca9ade74c9f +size 322519480 diff --git a/epoch80/hunyuan_config.toml b/epoch80/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch80/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch84/adapter_config.json b/epoch84/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch84/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch84/adapter_model.safetensors b/epoch84/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f25343d9cca72dd48b07c02f3e44dad5079ed38 --- /dev/null +++ b/epoch84/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18eed91fa10d938db6b09391a3508531c1b098d030aaef930618611e25c51e33 +size 322519480 diff --git a/epoch84/hunyuan_config.toml b/epoch84/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch84/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch88/adapter_config.json b/epoch88/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch88/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch88/adapter_model.safetensors b/epoch88/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd544885c3f05200c0ff56e0eee3d5d6ea278616 --- /dev/null +++ b/epoch88/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfd2b97d18f4c292ca7b6e9ab591db9bcaf84fbd0822737a7e1b0ca4451a254d +size 322519480 diff --git a/epoch88/hunyuan_config.toml b/epoch88/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch88/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch92/adapter_config.json b/epoch92/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch92/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch92/adapter_model.safetensors b/epoch92/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abf26d3908fc69a9f2e09fdc92145e0f9ce68ef8 --- /dev/null +++ b/epoch92/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb196a9bf647667185df26b4f1ee863455296753dd4687047202b41e5e8ab199 +size 322519480 diff --git a/epoch92/hunyuan_config.toml b/epoch92/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch92/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/epoch96/adapter_config.json b/epoch96/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..61204e3af1a06872d01197841629541a35dc0bb6 --- /dev/null +++ b/epoch96/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "img_attn_qkv", + "txt_attn_qkv", + "img_mod.linear", + "img_mlp.fc1", + "txt_mlp.fc1", + "linear1", + "linear2", + "img_mlp.fc2", + "img_attn_proj", + "txt_mod.linear", + "txt_mlp.fc2", + "modulation.linear", + "txt_attn_proj" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/epoch96/adapter_model.safetensors b/epoch96/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c19b6fc59dd064023a3971c0e0c91e2eb134920f --- /dev/null +++ b/epoch96/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69bc03867d7861ea3ab9ac99a1cf43aad7b11dfb2750b83ab5601338d5749407 +size 322519480 diff --git a/epoch96/hunyuan_config.toml b/epoch96/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/epoch96/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/events.out.tfevents.1738058959.nmx7i01b1d.2136.0 b/events.out.tfevents.1738058959.nmx7i01b1d.2136.0 new file mode 100644 index 0000000000000000000000000000000000000000..f8f8a37d498c219cf49506f0ff045c5b855dcd65 --- /dev/null +++ b/events.out.tfevents.1738058959.nmx7i01b1d.2136.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a37bf5917438c650554e0437b7b2253d7ee9044e8d5c45c813738abf5e1e312 +size 114631 diff --git a/global_step1597/layer_00-model_states.pt b/global_step1597/layer_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..42aca0cbb86108d21d40999b36c31f9b86048523 --- /dev/null +++ b/global_step1597/layer_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c682c9cc8e731c37498845f3a980635b04094468e55e77553c7a12ccff998f5 +size 920 diff --git a/global_step1597/layer_01-model_states.pt b/global_step1597/layer_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..80809b3b413071b6dee1902bdcc88ef595011e5e --- /dev/null +++ b/global_step1597/layer_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20f7f86faed8510d14f404cc066eb590d25f393d29286e73c2a2dbf7f55a9b93 +size 9051114 diff --git a/global_step1597/layer_02-model_states.pt b/global_step1597/layer_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..108a2595100e8fa37e45ad86d0a517a175f73e0d --- /dev/null +++ b/global_step1597/layer_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:226a6f8b71410098f2adb086319da10c101576df0bce5c68665b46ce398fd1f4 +size 9051114 diff --git a/global_step1597/layer_03-model_states.pt b/global_step1597/layer_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b78e1cb8094833b839c8405be1024a81f5da473b --- /dev/null +++ b/global_step1597/layer_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0900abd40a399e1fbd8c43a6bd2421538bb3a2dc6cc2838169af9607c922602a +size 9051114 diff --git a/global_step1597/layer_04-model_states.pt b/global_step1597/layer_04-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f51ae5257ae20e89140e6c995f3f9612dade8dce --- /dev/null +++ b/global_step1597/layer_04-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d7ceb86780d205a3b902a794131c3ab43363269e19cd7b61c36a5f49903954 +size 9051114 diff --git a/global_step1597/layer_05-model_states.pt b/global_step1597/layer_05-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d57d7a6879a8f9d535bdc59826bf3cfcf81c15d8 --- /dev/null +++ b/global_step1597/layer_05-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cee5fb27c4a88ec4962d901899f3a04f14628b40f4809c1f8e72901b63bb808 +size 9051114 diff --git a/global_step1597/layer_06-model_states.pt b/global_step1597/layer_06-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6722b7be7d0e75a1a377824eddbd402e8eb4c5c1 --- /dev/null +++ b/global_step1597/layer_06-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31ace2cef2bd059a12f0adce797b3a64fa500a1ab299985394a07074fa4f6031 +size 9051114 diff --git a/global_step1597/layer_07-model_states.pt b/global_step1597/layer_07-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..451bf2ed91354cdf2e027700af2e1d4aef480803 --- /dev/null +++ b/global_step1597/layer_07-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1bc7f0555e56dfde82e07ced25227a859ba3169b966f77053ca3f4f2d07e8af +size 9051114 diff --git a/global_step1597/layer_08-model_states.pt b/global_step1597/layer_08-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..112fda31728e4b3b10f862a27b6552ce445ce6fa --- /dev/null +++ b/global_step1597/layer_08-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b985ac0066d746fbf30e3f9d18b1bdd3f3dc3fe23a8110ad585485a0d3c4810b +size 9051114 diff --git a/global_step1597/layer_09-model_states.pt b/global_step1597/layer_09-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f563cccea26399036322d273f1d01a9a523cd3a9 --- /dev/null +++ b/global_step1597/layer_09-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91ca93aaad37f5b87e1da6e3f1e029cfa3af398a755aa1c51c657501d44a8a58 +size 9051114 diff --git a/global_step1597/layer_10-model_states.pt b/global_step1597/layer_10-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6431263e2e5484467648039251fd48a64de0000d --- /dev/null +++ b/global_step1597/layer_10-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92672ebae90c8b1c0ff82651f7b69db696e4e01b45f3df54859c1cc5d22f241c +size 9051114 diff --git a/global_step1597/layer_11-model_states.pt b/global_step1597/layer_11-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5804effbff4ac51a905309529c1889f8664842c --- /dev/null +++ b/global_step1597/layer_11-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0620b43949004e704afc8f73592076465ec222fa9927d3c4264212a21d3d0572 +size 9051114 diff --git a/global_step1597/layer_12-model_states.pt b/global_step1597/layer_12-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..479dd50eb0bd5ced0d9d39d2ba7df96ca40dcf97 --- /dev/null +++ b/global_step1597/layer_12-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15d067ec678604e512f900c68751dd723ef2377a971ab64301e3614407d04017 +size 9051114 diff --git a/global_step1597/layer_13-model_states.pt b/global_step1597/layer_13-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9762f47a303f228c6de721e37634cabf16bc29a9 --- /dev/null +++ b/global_step1597/layer_13-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a7412ed145671ac391959273ccfe6d7d0d77a65abc57918191cc51c3189b69e +size 9051114 diff --git a/global_step1597/layer_14-model_states.pt b/global_step1597/layer_14-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..088ac54a76dbba7f942bbd32aa7fbe967fd6db24 --- /dev/null +++ b/global_step1597/layer_14-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c7de3a833e6e9bb76bd9ed041c359cdec06815d96c83c042886b192cba2efba +size 9051114 diff --git a/global_step1597/layer_15-model_states.pt b/global_step1597/layer_15-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d72eeeeb32cce841eab543bfa1091cb007818872 --- /dev/null +++ b/global_step1597/layer_15-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45bc126fddb3eabc7113a11570c8aea8f4d057ae88ec3a7bf35d106aa2889161 +size 9051114 diff --git a/global_step1597/layer_16-model_states.pt b/global_step1597/layer_16-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..df7f0073c4d85b993a027919fad636d27d72d6a8 --- /dev/null +++ b/global_step1597/layer_16-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc5ee56105823a9d4a3e316a9a7002f0fb59fd414e11a15d14e3fb9a74a1dd3e +size 9051114 diff --git a/global_step1597/layer_17-model_states.pt b/global_step1597/layer_17-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d145dde758f2b24b7c0032a0972fa136f2ad0f3 --- /dev/null +++ b/global_step1597/layer_17-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16af0cfb81f2f83a883302599d9db96fedaa78dbd516443e6b634e90fea575fe +size 9051114 diff --git a/global_step1597/layer_18-model_states.pt b/global_step1597/layer_18-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..100bd1cc634a9357c860c7fd4f7612e7c15678e8 --- /dev/null +++ b/global_step1597/layer_18-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5a9489cb02e79c5a476ea99f927b28d5ef5609729cc541dc60e14466fce0195 +size 9051114 diff --git a/global_step1597/layer_19-model_states.pt b/global_step1597/layer_19-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2ff454a851b743e43b192632e6300875b98abb7 --- /dev/null +++ b/global_step1597/layer_19-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4dde60f7aec155b76e04b7b19fe8b0e4d432962b02e9b0ef08bb28aceb2ac8b +size 9051114 diff --git a/global_step1597/layer_20-model_states.pt b/global_step1597/layer_20-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..673551970d6aa833894f7feab5becdae9a60701d --- /dev/null +++ b/global_step1597/layer_20-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5629d35abaf9d61512ab38077345cf95280be4315e53e508ae9660722bd24772 +size 9051114 diff --git a/global_step1597/layer_22-model_states.pt b/global_step1597/layer_22-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c0c11f939767c52168098b3a53b863c744af1d6 --- /dev/null +++ b/global_step1597/layer_22-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af0100366a6ff930a00e88b8ec7b5e3c4daa25f4be4931035fc2bfd0a82a1569 +size 3541780 diff --git a/global_step1597/layer_23-model_states.pt b/global_step1597/layer_23-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec2caa750436f9ccd9f47d957c4b718a923d415c --- /dev/null +++ b/global_step1597/layer_23-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caa02ac25fca8cf057e14c4a46421e1c4a1b1c818d2bbdd248ea635c614c4176 +size 3541780 diff --git a/global_step1597/layer_24-model_states.pt b/global_step1597/layer_24-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f595390a3e6ec455e784568e9e59d4439c889d74 --- /dev/null +++ b/global_step1597/layer_24-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c5e53650a652bac7f6b78849731e74db2cdd1462e9a2d69f04fdf43be787e26 +size 3541780 diff --git a/global_step1597/layer_25-model_states.pt b/global_step1597/layer_25-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0214239ff5eedf1b45265b1a9ce17d1c80eba774 --- /dev/null +++ b/global_step1597/layer_25-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eed4a1cebf883cfd55cfaa1a984239cf7843fcc54cf7c31d4a6d6a1ff65b570 +size 3541780 diff --git a/global_step1597/layer_26-model_states.pt b/global_step1597/layer_26-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9eb2c87aed691782d159c2f720effba170edc891 --- /dev/null +++ b/global_step1597/layer_26-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f5f95445a26e51c8d617342b022670174112e914e4626eaf6bc29f4cf9c8f7e +size 3541780 diff --git a/global_step1597/layer_27-model_states.pt b/global_step1597/layer_27-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dad92d853193a6aa4b09070b153d57f6065ec869 --- /dev/null +++ b/global_step1597/layer_27-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:363a678ed5d2e8a9776378ef552d9042d767d3c0af7731153054ffa8c56495e5 +size 3541780 diff --git a/global_step1597/layer_28-model_states.pt b/global_step1597/layer_28-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b25a771cb17bfa3f25dd6f57cb49d0507b6782e8 --- /dev/null +++ b/global_step1597/layer_28-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69478b57082a50fe54126dd594f8c92995ba4bf4d1fa7c698984ffaee142c83f +size 3541780 diff --git a/global_step1597/layer_29-model_states.pt b/global_step1597/layer_29-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e17f795144f4fb82cb4256c0470ca05a2713c28 --- /dev/null +++ b/global_step1597/layer_29-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ca7bd7aee6f05508df1d22a9aca5c74c90f209fc749c1ef5ce937629a16360c +size 3541780 diff --git a/global_step1597/layer_30-model_states.pt b/global_step1597/layer_30-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8bf423b20462ef150aad23d35764da1c9f1a2ef --- /dev/null +++ b/global_step1597/layer_30-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa8f5f7bb09b1190f1eb3a13d497e317d85d571690073334094fd4e68be8d271 +size 3541780 diff --git a/global_step1597/layer_31-model_states.pt b/global_step1597/layer_31-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb2fc26440b0bcbefad7cd5ccc6938559606c42a --- /dev/null +++ b/global_step1597/layer_31-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d9ffe471757ea8c5e5201a3c1e01e29f6edbc8216b8b07477ba26f8d64c6d64 +size 3541780 diff --git a/global_step1597/layer_32-model_states.pt b/global_step1597/layer_32-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..649890b0ec2564eaff85a3fcce46dd91f7d19f51 --- /dev/null +++ b/global_step1597/layer_32-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843c5131055118c76b324baf1439e3ea592f46704a43ed99e9d5fb96f5cc67db +size 3541780 diff --git a/global_step1597/layer_33-model_states.pt b/global_step1597/layer_33-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..36f4588ce382e469bac59eeebd54ebaea2e0ab83 --- /dev/null +++ b/global_step1597/layer_33-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cda8ba1f45cc239b1dd0dd8ce3b0f8ea0f5dcfba94ede064eee77782e2153309 +size 3541780 diff --git a/global_step1597/layer_34-model_states.pt b/global_step1597/layer_34-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8bb50d80f91980a07416f6fc8442962cd570e4e --- /dev/null +++ b/global_step1597/layer_34-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:015832f93d7d8f066d2e533b7fded831191271d8209b03c0c542cf64ea4e29ed +size 3541780 diff --git a/global_step1597/layer_35-model_states.pt b/global_step1597/layer_35-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1e7d5fe2d378f753dd958ef220e84b0f59b92ea --- /dev/null +++ b/global_step1597/layer_35-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45f84df53919c354492a86c2e8c44d49ecbb5419ddcf9a5d1cd3b75a2dd638b1 +size 3541780 diff --git a/global_step1597/layer_36-model_states.pt b/global_step1597/layer_36-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..100ff8c89511d6522cd4b0ef0f87460ea9c7b7cb --- /dev/null +++ b/global_step1597/layer_36-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37f82dd0803acd1f1785c75f5e89e4927952acf47fd15fb3715b43cd49fde934 +size 3541780 diff --git a/global_step1597/layer_37-model_states.pt b/global_step1597/layer_37-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..203dfdcb1bb7c229e0945b0004b804e705078ccc --- /dev/null +++ b/global_step1597/layer_37-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e5d2b585fd4e57ef024964bf124c22323af7cbc833c3348b10462e0c4fc8387 +size 3541780 diff --git a/global_step1597/layer_38-model_states.pt b/global_step1597/layer_38-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..46e022ac8874c88b5cbe5d1866b79d142c62e6ef --- /dev/null +++ b/global_step1597/layer_38-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb3e9255725cbeed5591a2e121cc4acc0f18d1b6365628ef05e97b889ff3c901 +size 3541780 diff --git a/global_step1597/layer_39-model_states.pt b/global_step1597/layer_39-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..be742da5f072932f9351733ff70cb2cdab6a688b --- /dev/null +++ b/global_step1597/layer_39-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b034dabd34297381d5d603911ba8ede866d109fb9e7e72670a87fb5a59eca4de +size 3541780 diff --git a/global_step1597/layer_40-model_states.pt b/global_step1597/layer_40-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..05bee8778754ac036a7b3d85b85b6ed186f15be3 --- /dev/null +++ b/global_step1597/layer_40-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cf1ae4d1e26260d28486ae9707ab3660f6c192e786c8b351247a3e709b94223 +size 3541780 diff --git a/global_step1597/layer_41-model_states.pt b/global_step1597/layer_41-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9d6bfcc273409bfd0e618b836cc283f2affd8df --- /dev/null +++ b/global_step1597/layer_41-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38902a198b8b05ca2c4d15ffe5536229581b42af6de9b6e8776f994e6a9ba85b +size 3541780 diff --git a/global_step1597/layer_42-model_states.pt b/global_step1597/layer_42-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ccb6b6ecbd53550f606a8b3dc0facc376d0e1aff --- /dev/null +++ b/global_step1597/layer_42-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e03f1fc53effd427f06e2609cb91da08de47dab67e928ec15cb9231067b281b4 +size 3541780 diff --git a/global_step1597/layer_43-model_states.pt b/global_step1597/layer_43-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2baa98f33e1d683f3b0d278a8b5cd01c63d9a3ea --- /dev/null +++ b/global_step1597/layer_43-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27b98f641545f4c9c8793b8031a2b14cfe4bb42bb2d9d064f4a08e21cfe1106 +size 3541780 diff --git a/global_step1597/layer_44-model_states.pt b/global_step1597/layer_44-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a20d4b1a1d3e56e64f82826ab14cf0400bb6214 --- /dev/null +++ b/global_step1597/layer_44-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:417d96398aa01c3574d778e86924df6d28e6440d584b97abd15aa553ccc7e3b5 +size 3541780 diff --git a/global_step1597/layer_45-model_states.pt b/global_step1597/layer_45-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..021e108a3d5cfd234a9148c5b6ac7241ff945532 --- /dev/null +++ b/global_step1597/layer_45-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09374456fd3e4d52108cb8bc22e69cc90f730614eb79acf1e6ba6d093d2e3020 +size 3541780 diff --git a/global_step1597/layer_46-model_states.pt b/global_step1597/layer_46-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fc6ee42ce76d9c1cbbcbfb4656a03c50157a015 --- /dev/null +++ b/global_step1597/layer_46-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b141398025013e6f57239785e273bcda9593a3ea05e288df829479c8f897dd6 +size 3541780 diff --git a/global_step1597/layer_47-model_states.pt b/global_step1597/layer_47-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd82a8ea041bb65e9d8b9b95bd2697df2438e259 --- /dev/null +++ b/global_step1597/layer_47-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3d5a64462a8f205f5ab6d62d1f7e2efbe2093d38919e1da2328c6e4c729e045 +size 3541780 diff --git a/global_step1597/layer_48-model_states.pt b/global_step1597/layer_48-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..22e769cdf164cdc4b3599e8bea971fbfff2d1bfc --- /dev/null +++ b/global_step1597/layer_48-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9305e4d82d2682d3cd1014d543984ed65339897e1f0591d5649a25023b1a7c96 +size 3541780 diff --git a/global_step1597/layer_49-model_states.pt b/global_step1597/layer_49-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e505c8f9ccc078dab99fb20ec02fb38e02b5ee1 --- /dev/null +++ b/global_step1597/layer_49-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d31aaaba961de16ca57a8e3a90439438f5074282177101128196f423e9ed554a +size 3541780 diff --git a/global_step1597/layer_50-model_states.pt b/global_step1597/layer_50-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bb551c28ec9a5a2e2e0ea7d2a34aa0a6e785560 --- /dev/null +++ b/global_step1597/layer_50-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6e001917a04921eed89282dd224341ebc6d2cbb9696a548a3ed446da17976b6 +size 3541780 diff --git a/global_step1597/layer_51-model_states.pt b/global_step1597/layer_51-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0295a3c6e0b39b4c165f5fda3a0475187af43079 --- /dev/null +++ b/global_step1597/layer_51-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51b1b79a44bd4d3d6da2e172628ab267f004b2dc21701612e7e614b07d8ea792 +size 3541780 diff --git a/global_step1597/layer_52-model_states.pt b/global_step1597/layer_52-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3cb7d29750bb3ffccfea110f10d70c26255f926 --- /dev/null +++ b/global_step1597/layer_52-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecf6c75061d3faf343d5d5752793f9a889a5fdd2c58d1344ce34b08d5f6ff244 +size 3541780 diff --git a/global_step1597/layer_53-model_states.pt b/global_step1597/layer_53-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e02f6fdcc04b623eec54109ba162e4565fd5336 --- /dev/null +++ b/global_step1597/layer_53-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e70c3de6b9b2d0e5c448e1bc59eae96e1378358f5958057ad1a5dd91245361c +size 3541780 diff --git a/global_step1597/layer_54-model_states.pt b/global_step1597/layer_54-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fb1501309d4d17773ccfa19304291a53c7c4057 --- /dev/null +++ b/global_step1597/layer_54-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9348f3c9d51f0b9497e8a731d0bdc0793d5fec2fb4ebe36915bfdbe648ed064 +size 3541780 diff --git a/global_step1597/layer_55-model_states.pt b/global_step1597/layer_55-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa9049227428fdb04496680f84ede8613b78475e --- /dev/null +++ b/global_step1597/layer_55-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bff7eafb16ba32389d67fb80cb9e22e00048ff5ad523e2bae33e096fcd7bb650 +size 3541780 diff --git a/global_step1597/layer_56-model_states.pt b/global_step1597/layer_56-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0252060130df4e9c8ff4a01cea4f8659de12559 --- /dev/null +++ b/global_step1597/layer_56-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:157f838589e38be5e831617bb38fffd595d2320372700f8406cf62fb15628fd1 +size 3541780 diff --git a/global_step1597/layer_57-model_states.pt b/global_step1597/layer_57-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5cd0359f30548d82bc943f1b12098a27b9ed498 --- /dev/null +++ b/global_step1597/layer_57-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:224fa24470d4519e183d7848c463891fff372a91365a82f0527bcaeba5b32637 +size 3541780 diff --git a/global_step1597/layer_58-model_states.pt b/global_step1597/layer_58-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4603441f11f30900365931b8619aef44fc2cbb00 --- /dev/null +++ b/global_step1597/layer_58-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afacbad38a0a7db4efb6271c07916728c396266ea575508ae99b5d73ef73681a +size 3541780 diff --git a/global_step1597/layer_59-model_states.pt b/global_step1597/layer_59-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7affc2540f3b8a598ee98a50eb0c41f1c8f77ef --- /dev/null +++ b/global_step1597/layer_59-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b83bedc72590cbbfdc4a18cf96d18906e23f3e9422fc25603e65d753662d3d02 +size 3541780 diff --git a/global_step1597/layer_60-model_states.pt b/global_step1597/layer_60-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4348f73ce04beadfc8e274409ab8640d2a22a9d --- /dev/null +++ b/global_step1597/layer_60-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:087e7f3c27a2e5cfd7a407e5a8e13fd13e340d5eb0fdd5bd1fe74344e0beab53 +size 3541780 diff --git a/global_step1597/layer_61-model_states.pt b/global_step1597/layer_61-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec933a2c8fedcc0e2ed395f0cf891fb14990764c --- /dev/null +++ b/global_step1597/layer_61-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92634f7d521a70ea04eb020b9d333211664c0e1e3be83d3f42ae05920f86ffdd +size 3541780 diff --git a/global_step1597/layer_62-model_states.pt b/global_step1597/layer_62-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..40aef7e44c4d6be7b419c67a0587c9a568c0168e --- /dev/null +++ b/global_step1597/layer_62-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:400f9de1bb694fb2ea95724840a27c2abe0d3e3d8432a1075b589745a33fed8f +size 920 diff --git a/global_step1597/mp_rank_00_model_states.pt b/global_step1597/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..48447ecd3befb4160809be1e66b67510cd946a98 --- /dev/null +++ b/global_step1597/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899e77d6c518e3053725b8761cec97e8b3a775be33c7661b9221d9b6452a573a +size 967904473 diff --git a/global_step800/layer_00-model_states.pt b/global_step800/layer_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..42aca0cbb86108d21d40999b36c31f9b86048523 --- /dev/null +++ b/global_step800/layer_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c682c9cc8e731c37498845f3a980635b04094468e55e77553c7a12ccff998f5 +size 920 diff --git a/global_step800/layer_01-model_states.pt b/global_step800/layer_01-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6f3f760d4ba7f854bf83fd93684b8954e21d041 --- /dev/null +++ b/global_step800/layer_01-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d635dea5401fc9c294eb7760ce0e08c49514e2b8f5e794f9e0a654b0b6937f16 +size 9051114 diff --git a/global_step800/layer_02-model_states.pt b/global_step800/layer_02-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d1d1b5e16a553579b7e4a6e88b6be7ded3947e4 --- /dev/null +++ b/global_step800/layer_02-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e0c594dbd5376d52e883ef2a8f36ae4af49f8c8784c9698daa7fee477868b2 +size 9051114 diff --git a/global_step800/layer_03-model_states.pt b/global_step800/layer_03-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..23bfd224443998c1a930ffcff2696a2af3aedecb --- /dev/null +++ b/global_step800/layer_03-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc1b7e19e3cab19a4c314d5906be93e2722b9a1cee79c9d65e4f0f6ede511805 +size 9051114 diff --git a/global_step800/layer_04-model_states.pt b/global_step800/layer_04-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1ebb84ebd832b1ff380da23ac404b7c8b9f6353 --- /dev/null +++ b/global_step800/layer_04-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:614f281b0afe36a498e69c439e330c21a82e1e2992e0544103bbbf32b7a9bd58 +size 9051114 diff --git a/global_step800/layer_05-model_states.pt b/global_step800/layer_05-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1b2e780d1dbc2489bb2236d0ddaa00c4a3c2591 --- /dev/null +++ b/global_step800/layer_05-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc957daeebdb0ab7be3a11f5a7184e066f9ceca009593fe2dc5334f46eb236f +size 9051114 diff --git a/global_step800/layer_06-model_states.pt b/global_step800/layer_06-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c0d4937c628346e1f9480245d3b23b5ea443686 --- /dev/null +++ b/global_step800/layer_06-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f38334170a3111ebe0d48cf2543e76561052f7fd25a6314461e9d324f38c054 +size 9051114 diff --git a/global_step800/layer_07-model_states.pt b/global_step800/layer_07-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c8386f4cf6a32dd2317f31df41d65c0a7a3a56b --- /dev/null +++ b/global_step800/layer_07-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0e143a5819cd5158156da447f28d3212b05605fb9004336bcc217360d8a155f +size 9051114 diff --git a/global_step800/layer_08-model_states.pt b/global_step800/layer_08-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bd3c1173441753d48116c0e31170410364b360a --- /dev/null +++ b/global_step800/layer_08-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:940c51159bc7ce23ad0145863ccb24fb2295e4aded2851f6d3f41a17d9bdfaef +size 9051114 diff --git a/global_step800/layer_09-model_states.pt b/global_step800/layer_09-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b28cd50e3849551d11b1dfa8c0312170df469e8 --- /dev/null +++ b/global_step800/layer_09-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac7d4793016d094facb65b42cc707febedeb3239f5ef68383b81626e117670ed +size 9051114 diff --git a/global_step800/layer_10-model_states.pt b/global_step800/layer_10-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ff48c8f01a3b8c9e4f6c865155fe0e52467a6c6 --- /dev/null +++ b/global_step800/layer_10-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a013fc700839e36940f77ba4a4f4d372b66b6bc0b20030cdc9c1495f314df3 +size 9051114 diff --git a/global_step800/layer_11-model_states.pt b/global_step800/layer_11-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0a6f7877676eafa09b036647e09af670a03997d --- /dev/null +++ b/global_step800/layer_11-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b05f4c54b7f77283313378f3477846d2bf93907ca32c5190f7b0adc05fcf58c2 +size 9051114 diff --git a/global_step800/layer_12-model_states.pt b/global_step800/layer_12-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb2fb8544b46eeec0d35f63087bcd9d4e4012d59 --- /dev/null +++ b/global_step800/layer_12-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3047d60917dc714312e2ad4927e7fbccd161cf752c203eaf7e73feb5bbecf68a +size 9051114 diff --git a/global_step800/layer_13-model_states.pt b/global_step800/layer_13-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e49e15509cf6f80a12bb055a87f421ac1e57bc85 --- /dev/null +++ b/global_step800/layer_13-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2d4ba5e9c3db0c81dda5ff85270b8229c97e21552fe46515c8832a6cd08a334 +size 9051114 diff --git a/global_step800/layer_14-model_states.pt b/global_step800/layer_14-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..142c6ff7b0b34299e91f91c3dfe52dabfdd9246b --- /dev/null +++ b/global_step800/layer_14-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c38227e5a2d4a151804e3f18ff4b06e529700862cd6d8f32382fb019152ea488 +size 9051114 diff --git a/global_step800/layer_15-model_states.pt b/global_step800/layer_15-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..47c17b4ae702d21f29e80cb5bda4bcbbd94cae52 --- /dev/null +++ b/global_step800/layer_15-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64f1dc066f8c4a2fe757b057e4cc6e5501283a814400bcd4a87196f2f5effe8a +size 9051114 diff --git a/global_step800/layer_16-model_states.pt b/global_step800/layer_16-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebdc5b8744d9a7fdb106457946a27d134f0f02bb --- /dev/null +++ b/global_step800/layer_16-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eefbbbba5074f73a5f5c44aab4e5070773d845a7f0fcefaa214e5943aed9584 +size 9051114 diff --git a/global_step800/layer_17-model_states.pt b/global_step800/layer_17-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2a97d0f6b902f12d21a76bc74a735155a6b047b --- /dev/null +++ b/global_step800/layer_17-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a39e1390d125fb9768a225e42868bb532e42342d0d21a266088464d4f7fd934f +size 9051114 diff --git a/global_step800/layer_18-model_states.pt b/global_step800/layer_18-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a77b50974ea0aa85a8181d165efe496240613cc --- /dev/null +++ b/global_step800/layer_18-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8b8686ec1c68c95ce7cdf7a33847524379e3fe0bb78ae98dc1cb2a7f32002f6 +size 9051114 diff --git a/global_step800/layer_19-model_states.pt b/global_step800/layer_19-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f718af8a7d9bda0613f3576d2c63ae9e1d49b32 --- /dev/null +++ b/global_step800/layer_19-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb1103cae664469c4fa8a7d77a1396966ff19e29e178f0aefb9142ca82f5d1a8 +size 9051114 diff --git a/global_step800/layer_20-model_states.pt b/global_step800/layer_20-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f609fbddb5a4a732a4dbb7d8dfa6c0861a1786b3 --- /dev/null +++ b/global_step800/layer_20-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bda63ef35461ada0c9e884df9f085b8d4ff66fbaf152e1453374fb587798db17 +size 9051114 diff --git a/global_step800/layer_22-model_states.pt b/global_step800/layer_22-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..90961d3aa2d8846aadadea0fb7b8e9af15582f5a --- /dev/null +++ b/global_step800/layer_22-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:583f65bd15dcc25f3c0b77fac819c42237ee34a027cb24b3087f4f2220090a8f +size 3541780 diff --git a/global_step800/layer_23-model_states.pt b/global_step800/layer_23-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf1d7cea4f6aafb69cb6b7047b633a4c6f81fed8 --- /dev/null +++ b/global_step800/layer_23-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:804d813e2f27e452c9c55eca40b71d02e413dfd33ccf092bdf4b28ea31d08953 +size 3541780 diff --git a/global_step800/layer_24-model_states.pt b/global_step800/layer_24-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..96fcababe8a23bfb13f8f7f2a8fa23f44a7fb054 --- /dev/null +++ b/global_step800/layer_24-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6145c75e129e43187480c8799207076aa562068d63510cd217e7d40eb5a573f +size 3541780 diff --git a/global_step800/layer_25-model_states.pt b/global_step800/layer_25-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2c44462f9da12050a72faccd59f47810fc1bf6c --- /dev/null +++ b/global_step800/layer_25-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2dc3fc82dd8b3513df2c9df2fc2ae599052703b126cb051006039537fb84d9 +size 3541780 diff --git a/global_step800/layer_26-model_states.pt b/global_step800/layer_26-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..55dc2a315d846482af2e0260c5d82b3ff4e599a5 --- /dev/null +++ b/global_step800/layer_26-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d585e3467a8394dbb925cef853559fbc24ccb3ece2d64ec4ed52d686bb0cab93 +size 3541780 diff --git a/global_step800/layer_27-model_states.pt b/global_step800/layer_27-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..645b9be3168ceedc8ca3c12de7cc9054b3c45d7e --- /dev/null +++ b/global_step800/layer_27-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c8200ecc36e5f74840a6273874207245dcf2dfc5edca0d1de99b1b63ac10386 +size 3541780 diff --git a/global_step800/layer_28-model_states.pt b/global_step800/layer_28-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..88ec028d99e6b2ece42608cc5d71f4ed7c5d2b91 --- /dev/null +++ b/global_step800/layer_28-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dbdf7cf9b7bf010798eb51fce308586ce7457c3bd33addd7324bab2504a4b57 +size 3541780 diff --git a/global_step800/layer_29-model_states.pt b/global_step800/layer_29-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e2379affab5a5063780087bd5a6e1d22d12662f --- /dev/null +++ b/global_step800/layer_29-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09dd30deb6408ff3aed8032e6b48edb199028fd204ea21ba631bf15ef5614ce4 +size 3541780 diff --git a/global_step800/layer_30-model_states.pt b/global_step800/layer_30-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b602a1875e87df90b796f0b2dc221a2e1e3f92f --- /dev/null +++ b/global_step800/layer_30-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7780c1c580c2988f10559f7932478da8f4c7273cedd9819099c2fe281bf97cd +size 3541780 diff --git a/global_step800/layer_31-model_states.pt b/global_step800/layer_31-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..45957ffd35ddbc2d0e87d24fe6154d08ea3b239c --- /dev/null +++ b/global_step800/layer_31-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0737cbbae84b722e0a18db22220d4fa5bc4ea665c1dcfd9cc13ea58546224278 +size 3541780 diff --git a/global_step800/layer_32-model_states.pt b/global_step800/layer_32-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3843155e618180d94cd366bf1eb5954c31543e74 --- /dev/null +++ b/global_step800/layer_32-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f19ac1f9155e7a2965740975106e79e1dc5f67092c391a05d064e8567939a411 +size 3541780 diff --git a/global_step800/layer_33-model_states.pt b/global_step800/layer_33-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b2f2231f519a1f8e24c14037421e57350a2bf4c --- /dev/null +++ b/global_step800/layer_33-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdb18e721b94b371b66afcd25abc65ae6c38237e86f8c05a30b23df346862bbf +size 3541780 diff --git a/global_step800/layer_34-model_states.pt b/global_step800/layer_34-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c407e8bcbd0fd4ec51634289edb4e4c6425792fb --- /dev/null +++ b/global_step800/layer_34-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b2898cd3ae3435cd669f3fb2c80fcba0aa2563f3ef895394afee83e5ed5d9a +size 3541780 diff --git a/global_step800/layer_35-model_states.pt b/global_step800/layer_35-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd7a3d0f29badeb5183d9f8bcc8d45d634dbcf91 --- /dev/null +++ b/global_step800/layer_35-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c29a9bcbb5a403dd86e6f6024763837b466337d4496bc13edfdc68f5678b8225 +size 3541780 diff --git a/global_step800/layer_36-model_states.pt b/global_step800/layer_36-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..551b55176d49210df26ac339b789f372611c7cbe --- /dev/null +++ b/global_step800/layer_36-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94a67dfb22318ec9380efe108fbdc347aa17af6f42c9fa7d968dee0a939649af +size 3541780 diff --git a/global_step800/layer_37-model_states.pt b/global_step800/layer_37-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa57c796bb8d60ec54b31c5d2494225286d674d3 --- /dev/null +++ b/global_step800/layer_37-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:031c6aa6fc11f2e10feee525194c080c5b987c5bc4864c6015557b4efd6a491a +size 3541780 diff --git a/global_step800/layer_38-model_states.pt b/global_step800/layer_38-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..469063411113592aa24ac98e75b6c22963907aab --- /dev/null +++ b/global_step800/layer_38-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:155f61ac54e3e448d7313264a8fe972f93a76d55019f0f119c09e20a1f6f5afc +size 3541780 diff --git a/global_step800/layer_39-model_states.pt b/global_step800/layer_39-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6026dd70a08ed7b05f02e524e85aa8a9cafce076 --- /dev/null +++ b/global_step800/layer_39-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6107d2c492a2d7ddda6a0e1a8dce8d03134bfc30ff98cfc171a315880533b44 +size 3541780 diff --git a/global_step800/layer_40-model_states.pt b/global_step800/layer_40-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3cb95b808dc7c0efd545dc6ae10f923143919cf5 --- /dev/null +++ b/global_step800/layer_40-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02ac3cf5b7399cf27444a1cf952e16035977e802cc106de710ffe0ea4f5f8ee6 +size 3541780 diff --git a/global_step800/layer_41-model_states.pt b/global_step800/layer_41-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..572fc33d418cd24767dd8d2bda6ce09cd6288b91 --- /dev/null +++ b/global_step800/layer_41-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f28d861b9d14d4d1243ab7e0f8fd966abac175535b4a90e72113de49da961441 +size 3541780 diff --git a/global_step800/layer_42-model_states.pt b/global_step800/layer_42-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..05fd195e8fe7e7d7fc7a1ab2616ba54461eb1b47 --- /dev/null +++ b/global_step800/layer_42-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc1c1f6251a3e86fffbfe18ae879e0891d7736568884f8c812fa12c8ea6707d3 +size 3541780 diff --git a/global_step800/layer_43-model_states.pt b/global_step800/layer_43-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce8d4b28c2717e9526d4ed8e2c4221faa61994fc --- /dev/null +++ b/global_step800/layer_43-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fddf4d1be5fc8656043942fbb01548945277bacdf6e46514dfaea0d36d827fa +size 3541780 diff --git a/global_step800/layer_44-model_states.pt b/global_step800/layer_44-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6bc48a1b008b4f3605685ee93f2c041ef46b2d70 --- /dev/null +++ b/global_step800/layer_44-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a998d6c45dbb12f139e2f0ce518a48b3bf22b5ba934fa2cd8a0e80a7482c32d +size 3541780 diff --git a/global_step800/layer_45-model_states.pt b/global_step800/layer_45-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e14cf676ad984b1297c6d8948aa698ac818b417 --- /dev/null +++ b/global_step800/layer_45-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f113e39cf871c47f62f30f3e6dc71a255a4d41599aa800b02d4292cd4347f8d8 +size 3541780 diff --git a/global_step800/layer_46-model_states.pt b/global_step800/layer_46-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc98447ebdfb7afa3c55d19a3a2bf30fcc903965 --- /dev/null +++ b/global_step800/layer_46-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41bfe551e00152f1b8187c4389c84f2bdab2a963906c17e9beed2113970d3e2a +size 3541780 diff --git a/global_step800/layer_47-model_states.pt b/global_step800/layer_47-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7746093d50d7b7d6b29c767d9e26759aa4229ded --- /dev/null +++ b/global_step800/layer_47-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2581df22d58722cdb38be5b2410bd0513a6dac4932ba5e66766f33a3be4faf2c +size 3541780 diff --git a/global_step800/layer_48-model_states.pt b/global_step800/layer_48-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..306b801c461d69ed895e4b0bb5ffdb15d5120604 --- /dev/null +++ b/global_step800/layer_48-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b602327d7ea039e59617cefe73e0b04d7c39c399d198fd4a18e3aa9b4e75d047 +size 3541780 diff --git a/global_step800/layer_49-model_states.pt b/global_step800/layer_49-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..95272ed5879676a7a879b9816ecd7f8e5db48cb8 --- /dev/null +++ b/global_step800/layer_49-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f7bcbd97eac0ab059b718fc98345549ed7daef66853445990bd7a8969f6dbe2 +size 3541780 diff --git a/global_step800/layer_50-model_states.pt b/global_step800/layer_50-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..73169dc8edd83a490051efeb67ff6b5f4532f1e4 --- /dev/null +++ b/global_step800/layer_50-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a06bf9fea0bdca64601bc9fd7fb074b5ddb58cfa52bcbcee23a7de7b90550251 +size 3541780 diff --git a/global_step800/layer_51-model_states.pt b/global_step800/layer_51-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6691155d4416bab037966828b272360d3f3a1fde --- /dev/null +++ b/global_step800/layer_51-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0f8f3277144c6ae7fbab53df14487af2175b8d7ac81829a823e292923ce7351 +size 3541780 diff --git a/global_step800/layer_52-model_states.pt b/global_step800/layer_52-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..25521e9012c9188940855970537b6d995729263c --- /dev/null +++ b/global_step800/layer_52-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24ec319923f3129e1a25a3f675f5d750fd300eec29fa287007cc9049313e83d2 +size 3541780 diff --git a/global_step800/layer_53-model_states.pt b/global_step800/layer_53-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9af936e8fc13270475f6297c4d2f07c4562ef61 --- /dev/null +++ b/global_step800/layer_53-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5e5bb1716f1597722c57a2123ac314f1e38886001ff9d4e3486214eb25db502 +size 3541780 diff --git a/global_step800/layer_54-model_states.pt b/global_step800/layer_54-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..888c53e2fb728b2b81532eded825545ec650a9e2 --- /dev/null +++ b/global_step800/layer_54-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2053bca11d01932fecbc914c1190454edbc14531caa870df4385a26666a5f708 +size 3541780 diff --git a/global_step800/layer_55-model_states.pt b/global_step800/layer_55-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7405219113927e56a237a39595c7a98bbb9700e7 --- /dev/null +++ b/global_step800/layer_55-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db545fb04222c712c820784925e05cea141dfea436552bd11bb24c9a150fac8e +size 3541780 diff --git a/global_step800/layer_56-model_states.pt b/global_step800/layer_56-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bafe242c113f8dea59415d22eb14fd07f5edfc57 --- /dev/null +++ b/global_step800/layer_56-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3586448a976c83a0f67bbb054a0d45a414f40bca58af9908dd12c24544fac4c +size 3541780 diff --git a/global_step800/layer_57-model_states.pt b/global_step800/layer_57-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ab793135edc83378e704e37360a501ab50f4f00 --- /dev/null +++ b/global_step800/layer_57-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18995090265180db4386ce8b588f3c638708314b2de629d86e346bdb214de12b +size 3541780 diff --git a/global_step800/layer_58-model_states.pt b/global_step800/layer_58-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..348cb194ddf5880b3a1ec7934eb287e20828c8df --- /dev/null +++ b/global_step800/layer_58-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06731d3ee15880fd62dfd34ddda3e52c09259dcd90edd1b18c6cd7122a128b37 +size 3541780 diff --git a/global_step800/layer_59-model_states.pt b/global_step800/layer_59-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2197093fd796fc8ea47792fea96d15c16c6d3716 --- /dev/null +++ b/global_step800/layer_59-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:783a1d5222b3d779e9f8c10124f677d3578b112ff924079ae9100c237f24d24d +size 3541780 diff --git a/global_step800/layer_60-model_states.pt b/global_step800/layer_60-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..79b7a3949ebe74f16d557fccbe52373aa21fc3d3 --- /dev/null +++ b/global_step800/layer_60-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a9a5ccd78fb9a8ca0c08e6a123174af60dc24458883bf3eb5fb2376e4852433 +size 3541780 diff --git a/global_step800/layer_61-model_states.pt b/global_step800/layer_61-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..abb0d3a1c817f5853fc430f9232e20e74cafffd3 --- /dev/null +++ b/global_step800/layer_61-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93b7d6a8450fe11cd1a6f22ce749cfc6ab14e01406c089adf00581a63945ccde +size 3541780 diff --git a/global_step800/layer_62-model_states.pt b/global_step800/layer_62-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..40aef7e44c4d6be7b419c67a0587c9a568c0168e --- /dev/null +++ b/global_step800/layer_62-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:400f9de1bb694fb2ea95724840a27c2abe0d3e3d8432a1075b589745a33fed8f +size 920 diff --git a/global_step800/mp_rank_00_model_states.pt b/global_step800/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7fb95212519ece1fe5747318742daf7f5a5192d4 --- /dev/null +++ b/global_step800/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f446304111d3154b93f99ffca22a9aa7b6ff1ddca9ce500b7d9312f8598f2b7e +size 967904473 diff --git a/hunyuan_config.toml b/hunyuan_config.toml new file mode 100644 index 0000000000000000000000000000000000000000..b31e757f581205f5448ba141af2dfee449d7ef45 --- /dev/null +++ b/hunyuan_config.toml @@ -0,0 +1,94 @@ +# Output path for training runs. Each training run makes a new directory in here. +output_dir = '/notebooks/diffusion-pipe/output' + +# Dataset config file. +dataset = '/notebooks/diffusion-pipe/dataset_files/dataset_config.toml' +# You can have separate eval datasets. Give them a name for Tensorboard metrics. +# eval_datasets = [ +# {name = 'something', config = 'path/to/eval_dataset.toml'}, +# ] + +# training settings + +# I usually set this to a really high value because I don't know how long I want to train. +epochs = 1000 +# Batch size of a single forward/backward pass for one GPU. +micro_batch_size_per_gpu = 1 +# Pipeline parallelism degree. A single instance of the model is divided across this many GPUs. +pipeline_stages = 1 +# Number of micro-batches sent through the pipeline for each training step. +# If pipeline_stages > 1, a higher GAS means better GPU utilization due to smaller pipeline bubbles (where GPUs aren't overlapping computation). +gradient_accumulation_steps = 4 +# Grad norm clipping. +gradient_clipping = 1.0 +# Learning rate warmup. +warmup_steps = 100 + +# eval settings + +eval_every_n_epochs = 1 +eval_before_first_step = true +# Might want to set these lower for eval so that less images get dropped (eval dataset size is usually much smaller than training set). +# Each size bucket of images/videos is rounded down to the nearest multiple of the global batch size, so higher global batch size means +# more dropped images. Usually doesn't matter for training but the eval set is much smaller so it can matter. +eval_micro_batch_size_per_gpu = 1 +eval_gradient_accumulation_steps = 1 + +# misc settings + +# Probably want to set this a bit higher if you have a smaller dataset so you don't end up with a million saved models. +save_every_n_epochs = 4 +# Can checkpoint the training state every n number of epochs or minutes. Set only one of these. You can resume from checkpoints using the --resume_from_checkpoint flag. +#checkpoint_every_n_epochs = 1 +checkpoint_every_n_minutes = 120 +# Always set to true unless you have a huge amount of VRAM. +activation_checkpointing = true +# Controls how Deepspeed decides how to divide layers across GPUs. Probably don't change this. +partition_method = 'parameters' +# dtype for saving the LoRA or model, if different from training dtype +save_dtype = 'bfloat16' +# Batch size for caching latents and text embeddings. Increasing can lead to higher GPU utilization during caching phase but uses more memory. +caching_batch_size = 1 +# How often deepspeed logs to console. +steps_per_print = 1 +# How to extract video clips for training from a single input video file. +# The video file is first assigned to one of the configured frame buckets, but then we must extract one or more clips of exactly the right +# number of frames for that bucket. +# single_beginning: one clip starting at the beginning of the video +# single_middle: one clip from the middle of the video (cutting off the start and end equally) +# multiple_overlapping: extract the minimum number of clips to cover the full range of the video. They might overlap some. +# default is single_middle +video_clip_mode = 'single_middle' + +[model] +type = 'hunyuan-video' +# Can load Hunyuan Video entirely from the ckpt path set up for the official inference scripts. +#ckpt_path = '/home/anon/HunyuanVideo/ckpts' +# Or you can load it by pointing to all the ComfyUI files. +transformer_path = '/notebooks/diffusion-pipe/hunyuan_model_files/hunyuan_dit.safetensors' +vae_path = '/notebooks/diffusion-pipe/hunyuan_model_files/vae.safetensors' +llm_path = '/notebooks/diffusion-pipe/hunyuan_model_files/llava-llama-3-8b-text-encoder-tokenizer/' +clip_path = '/notebooks/diffusion-pipe/hunyuan_model_files/clip-vit-large-patch14/' +# Base dtype used for all models. +dtype = 'bfloat16' +# Hunyuan Video supports fp8 for the transformer when training LoRA. +transformer_dtype = 'float8' +# How to sample timesteps to train on. Can be logit_normal or uniform. +timestep_sample_method = 'logit_normal' + +[adapter] +type = 'lora' +rank = 32 +# Dtype for the LoRA weights you are training. +dtype = 'bfloat16' +# You can initialize the lora weights from a previously trained lora. +#init_from_existing = '/data/diffusion_pipe_training_runs/something/epoch50' + +[optimizer] +# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights. +# Look at train.py for other options. You could also easily edit the file and add your own. +type = 'adamw_optimi' +lr = 2e-5 +betas = [0.9, 0.99] +weight_decay = 0.01 +eps = 1e-8 \ No newline at end of file diff --git a/latest b/latest new file mode 100644 index 0000000000000000000000000000000000000000..d56b300780898882dcba14d553cda6019e516aae --- /dev/null +++ b/latest @@ -0,0 +1 @@ +global_step1597 \ No newline at end of file