Neil Wang
commited on
Commit
•
364cc0a
1
Parent(s):
000c736
768_10k
Browse files- args.json +3 -3
- samples/0/0.png +0 -0
- samples/0/1.png +0 -0
- samples/0/2.png +0 -0
- samples/0/3.png +0 -0
- train_inpainting_dreambooth.py +31 -20
- unet/diffusion_pytorch_model.safetensors +1 -1
args.json
CHANGED
@@ -15,14 +15,14 @@
|
|
15 |
"with_prior_preservation": false,
|
16 |
"prior_loss_weight": 1.0,
|
17 |
"num_class_images": 100,
|
18 |
-
"output_dir": "./models/
|
19 |
"seed": 3434554,
|
20 |
"resolution": 512,
|
21 |
"center_crop": false,
|
22 |
"train_text_encoder": false,
|
23 |
"train_batch_size": 4,
|
24 |
"sample_batch_size": 2,
|
25 |
-
"num_train_epochs":
|
26 |
"max_train_steps": 10000,
|
27 |
"gradient_accumulation_steps": 1,
|
28 |
"gradient_checkpointing": false,
|
@@ -51,7 +51,7 @@
|
|
51 |
{
|
52 |
"instance_prompt": "photo of zwx dog",
|
53 |
"class_prompt": "photo of a dog",
|
54 |
-
"instance_data_dir": "
|
55 |
"class_data_dir": "../../../data/dog"
|
56 |
}
|
57 |
]
|
|
|
15 |
"with_prior_preservation": false,
|
16 |
"prior_loss_weight": 1.0,
|
17 |
"num_class_images": 100,
|
18 |
+
"output_dir": "./models/768",
|
19 |
"seed": 3434554,
|
20 |
"resolution": 512,
|
21 |
"center_crop": false,
|
22 |
"train_text_encoder": false,
|
23 |
"train_batch_size": 4,
|
24 |
"sample_batch_size": 2,
|
25 |
+
"num_train_epochs": 26,
|
26 |
"max_train_steps": 10000,
|
27 |
"gradient_accumulation_steps": 1,
|
28 |
"gradient_checkpointing": false,
|
|
|
51 |
{
|
52 |
"instance_prompt": "photo of zwx dog",
|
53 |
"class_prompt": "photo of a dog",
|
54 |
+
"instance_data_dir": "/home/neil/Documents/dataset_combined/combined",
|
55 |
"class_data_dir": "../../../data/dog"
|
56 |
}
|
57 |
]
|
samples/0/0.png
CHANGED
samples/0/1.png
CHANGED
samples/0/2.png
CHANGED
samples/0/3.png
CHANGED
train_inpainting_dreambooth.py
CHANGED
@@ -290,10 +290,6 @@ def generate_random_mask(image):
|
|
290 |
# print('foobar', mask.shape)
|
291 |
mask = image[-1].unsqueeze(0)
|
292 |
# torchvision.transforms.functional.to_pil_image(mask).save('foomask2.png')
|
293 |
-
|
294 |
-
# if random.uniform(0, 1) < 0.25:
|
295 |
-
# mask = torch.zeros_like(image[:1])
|
296 |
-
# mask.fill_(1.)
|
297 |
# print('foobar',image.shape)
|
298 |
# torchvision.transforms.functional.to_pil_image(image[:4]).save('fooimageinside2.png')
|
299 |
# print('foobar',image[:3].shape)
|
@@ -301,16 +297,16 @@ def generate_random_mask(image):
|
|
301 |
# torchvision.transforms.functional.to_pil_image(masked_image).save('foomaskedimage2.png')
|
302 |
return mask, masked_image
|
303 |
|
304 |
-
image_transforms = transforms.Compose(
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
)
|
314 |
# instance_image = Image.open("/home/neil/Documents/diffusers/examples/dreambooth/512_/a corner sofa in midnight blue in a room with vibrant walls and eclectic artwork, creating a lively and energetic atmosphere. .png")
|
315 |
# alpha = instance_image.split()[-1]
|
316 |
# instance_image.putalpha(ImageOps.invert(alpha))
|
@@ -386,7 +382,7 @@ class DreamBoothDataset(Dataset):
|
|
386 |
self.image_transforms = transforms.Compose(
|
387 |
[
|
388 |
torchvision.transforms.RandomHorizontalFlip(0.5 * hflip),
|
389 |
-
transforms.RandomResizedCrop(size, scale=(0.75,1.0)),
|
390 |
torchvision.transforms.ToTensor(),
|
391 |
torchvision.transforms.Normalize([0.5], [0.5]),
|
392 |
]
|
@@ -399,18 +395,33 @@ class DreamBoothDataset(Dataset):
|
|
399 |
example = {}
|
400 |
instance_path, instance_prompt = self.instance_images_path[index % self.num_instance_images]
|
401 |
instance_image = Image.open(instance_path)
|
402 |
-
alpha = instance_image.split()[-1]
|
403 |
-
|
404 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
405 |
example["instance_images"] = self.image_transforms(instance_image)
|
|
|
406 |
example["instance_masks"], example["instance_masked_images"] = generate_random_mask(example["instance_images"])
|
407 |
# torchvision.transforms.functional.to_pil_image(example["instance_masks"][0]).save('foo_instance_masks.png')
|
408 |
# torchvision.transforms.functional.to_pil_image(example["instance_masked_images"][0]).save('foo_instance_masked_images.png')
|
409 |
example["instance_images"] = example["instance_images"][:3]
|
410 |
# torchvision.transforms.functional.to_pil_image(example["instance_images"]).save('foo_instance_image.png')
|
|
|
411 |
example["instance_prompt_ids"] = self.tokenizer(
|
412 |
# instance_prompt,
|
413 |
-
os.path.basename(
|
414 |
padding="max_length" if self.pad_tokens else "do_not_pad",
|
415 |
truncation=True,
|
416 |
max_length=self.tokenizer.model_max_length,
|
@@ -884,7 +895,7 @@ def main(args):
|
|
884 |
# Add noise to the latents according to the noise magnitude at each timestep
|
885 |
# (this is the forward diffusion process)
|
886 |
noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
|
887 |
-
|
888 |
# Get the text embedding for conditioning
|
889 |
with text_enc_context:
|
890 |
if not args.not_cache_latents:
|
|
|
290 |
# print('foobar', mask.shape)
|
291 |
mask = image[-1].unsqueeze(0)
|
292 |
# torchvision.transforms.functional.to_pil_image(mask).save('foomask2.png')
|
|
|
|
|
|
|
|
|
293 |
# print('foobar',image.shape)
|
294 |
# torchvision.transforms.functional.to_pil_image(image[:4]).save('fooimageinside2.png')
|
295 |
# print('foobar',image[:3].shape)
|
|
|
297 |
# torchvision.transforms.functional.to_pil_image(masked_image).save('foomaskedimage2.png')
|
298 |
return mask, masked_image
|
299 |
|
300 |
+
# image_transforms = transforms.Compose(
|
301 |
+
# [
|
302 |
+
# torchvision.transforms.RandomHorizontalFlip(0.5 * True),
|
303 |
+
# # transforms.Resize(size, interpolation=transforms.InterpolationMode.BILINEAR),
|
304 |
+
# # transforms.CenterCrop(size) if center_crop else transforms.RandomCrop(size),
|
305 |
+
# transforms.RandomResizedCrop(512, scale=(0.75,1.0)),
|
306 |
+
# torchvision.transforms.ToTensor(),
|
307 |
+
# torchvision.transforms.Normalize([0.5], [0.5]),
|
308 |
+
# ]
|
309 |
+
# )
|
310 |
# instance_image = Image.open("/home/neil/Documents/diffusers/examples/dreambooth/512_/a corner sofa in midnight blue in a room with vibrant walls and eclectic artwork, creating a lively and energetic atmosphere. .png")
|
311 |
# alpha = instance_image.split()[-1]
|
312 |
# instance_image.putalpha(ImageOps.invert(alpha))
|
|
|
382 |
self.image_transforms = transforms.Compose(
|
383 |
[
|
384 |
torchvision.transforms.RandomHorizontalFlip(0.5 * hflip),
|
385 |
+
# transforms.RandomResizedCrop(size, scale=(0.75,1.0)),
|
386 |
torchvision.transforms.ToTensor(),
|
387 |
torchvision.transforms.Normalize([0.5], [0.5]),
|
388 |
]
|
|
|
395 |
example = {}
|
396 |
instance_path, instance_prompt = self.instance_images_path[index % self.num_instance_images]
|
397 |
instance_image = Image.open(instance_path)
|
398 |
+
# alpha = instance_image.split()[-1]
|
399 |
+
# !!!! for this dataset already inverted
|
400 |
+
# instance_image.putalpha(ImageOps.invert(alpha))
|
401 |
+
# instance_image.putalpha(alpha)
|
402 |
+
|
403 |
+
# if random.uniform(0, 1) < 0.25:
|
404 |
+
# # print(instance_path)
|
405 |
+
# # print(type(instance_path))
|
406 |
+
# norm_path = str(instance_path).replace('/combined/','/combined_norm/')
|
407 |
+
# norm_path = norm_path.split('_')
|
408 |
+
# instance_image = Image.open(Path("_".join(norm_path[:-5])+'_'+norm_path[-1]))
|
409 |
+
# instance_image.putalpha(Image.new('L',instance_image.size,(255)))
|
410 |
+
# # prior = True
|
411 |
+
# # mask = torch.zeros_like(image[:1])
|
412 |
+
# # mask.fill_(1.)
|
413 |
+
|
414 |
example["instance_images"] = self.image_transforms(instance_image)
|
415 |
+
# using my bastardized prior preservation
|
416 |
example["instance_masks"], example["instance_masked_images"] = generate_random_mask(example["instance_images"])
|
417 |
# torchvision.transforms.functional.to_pil_image(example["instance_masks"][0]).save('foo_instance_masks.png')
|
418 |
# torchvision.transforms.functional.to_pil_image(example["instance_masked_images"][0]).save('foo_instance_masked_images.png')
|
419 |
example["instance_images"] = example["instance_images"][:3]
|
420 |
# torchvision.transforms.functional.to_pil_image(example["instance_images"]).save('foo_instance_image.png')
|
421 |
+
# print(os.path.splitext(os.path.basename(instance_path))[0])
|
422 |
example["instance_prompt_ids"] = self.tokenizer(
|
423 |
# instance_prompt,
|
424 |
+
os.path.splitext(os.path.basename(instance_path))[0],
|
425 |
padding="max_length" if self.pad_tokens else "do_not_pad",
|
426 |
truncation=True,
|
427 |
max_length=self.tokenizer.model_max_length,
|
|
|
895 |
# Add noise to the latents according to the noise magnitude at each timestep
|
896 |
# (this is the forward diffusion process)
|
897 |
noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
|
898 |
+
# breakpoint()
|
899 |
# Get the text embedding for conditioning
|
900 |
with text_enc_context:
|
901 |
if not args.not_cache_latents:
|
unet/diffusion_pytorch_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1719154104
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09ff49c01ca2f73f52eb4c028a250767ebe279c994f1d82fb00576985cf625f4
|
3 |
size 1719154104
|