Neil Wang commited on
Commit
364cc0a
1 Parent(s): 000c736
args.json CHANGED
@@ -15,14 +15,14 @@
15
  "with_prior_preservation": false,
16
  "prior_loss_weight": 1.0,
17
  "num_class_images": 100,
18
- "output_dir": "./models/outgrowths_7",
19
  "seed": 3434554,
20
  "resolution": 512,
21
  "center_crop": false,
22
  "train_text_encoder": false,
23
  "train_batch_size": 4,
24
  "sample_batch_size": 2,
25
- "num_train_epochs": 176,
26
  "max_train_steps": 10000,
27
  "gradient_accumulation_steps": 1,
28
  "gradient_checkpointing": false,
@@ -51,7 +51,7 @@
51
  {
52
  "instance_prompt": "photo of zwx dog",
53
  "class_prompt": "photo of a dog",
54
- "instance_data_dir": "./512",
55
  "class_data_dir": "../../../data/dog"
56
  }
57
  ]
 
15
  "with_prior_preservation": false,
16
  "prior_loss_weight": 1.0,
17
  "num_class_images": 100,
18
+ "output_dir": "./models/768",
19
  "seed": 3434554,
20
  "resolution": 512,
21
  "center_crop": false,
22
  "train_text_encoder": false,
23
  "train_batch_size": 4,
24
  "sample_batch_size": 2,
25
+ "num_train_epochs": 26,
26
  "max_train_steps": 10000,
27
  "gradient_accumulation_steps": 1,
28
  "gradient_checkpointing": false,
 
51
  {
52
  "instance_prompt": "photo of zwx dog",
53
  "class_prompt": "photo of a dog",
54
+ "instance_data_dir": "/home/neil/Documents/dataset_combined/combined",
55
  "class_data_dir": "../../../data/dog"
56
  }
57
  ]
samples/0/0.png CHANGED
samples/0/1.png CHANGED
samples/0/2.png CHANGED
samples/0/3.png CHANGED
train_inpainting_dreambooth.py CHANGED
@@ -290,10 +290,6 @@ def generate_random_mask(image):
290
  # print('foobar', mask.shape)
291
  mask = image[-1].unsqueeze(0)
292
  # torchvision.transforms.functional.to_pil_image(mask).save('foomask2.png')
293
-
294
- # if random.uniform(0, 1) < 0.25:
295
- # mask = torch.zeros_like(image[:1])
296
- # mask.fill_(1.)
297
  # print('foobar',image.shape)
298
  # torchvision.transforms.functional.to_pil_image(image[:4]).save('fooimageinside2.png')
299
  # print('foobar',image[:3].shape)
@@ -301,16 +297,16 @@ def generate_random_mask(image):
301
  # torchvision.transforms.functional.to_pil_image(masked_image).save('foomaskedimage2.png')
302
  return mask, masked_image
303
 
304
- image_transforms = transforms.Compose(
305
- [
306
- torchvision.transforms.RandomHorizontalFlip(0.5 * True),
307
- # transforms.Resize(size, interpolation=transforms.InterpolationMode.BILINEAR),
308
- # transforms.CenterCrop(size) if center_crop else transforms.RandomCrop(size),
309
- transforms.RandomResizedCrop(512, scale=(0.75,1.0)),
310
- torchvision.transforms.ToTensor(),
311
- torchvision.transforms.Normalize([0.5], [0.5]),
312
- ]
313
- )
314
  # instance_image = Image.open("/home/neil/Documents/diffusers/examples/dreambooth/512_/a corner sofa in midnight blue in a room with vibrant walls and eclectic artwork, creating a lively and energetic atmosphere. .png")
315
  # alpha = instance_image.split()[-1]
316
  # instance_image.putalpha(ImageOps.invert(alpha))
@@ -386,7 +382,7 @@ class DreamBoothDataset(Dataset):
386
  self.image_transforms = transforms.Compose(
387
  [
388
  torchvision.transforms.RandomHorizontalFlip(0.5 * hflip),
389
- transforms.RandomResizedCrop(size, scale=(0.75,1.0)),
390
  torchvision.transforms.ToTensor(),
391
  torchvision.transforms.Normalize([0.5], [0.5]),
392
  ]
@@ -399,18 +395,33 @@ class DreamBoothDataset(Dataset):
399
  example = {}
400
  instance_path, instance_prompt = self.instance_images_path[index % self.num_instance_images]
401
  instance_image = Image.open(instance_path)
402
- alpha = instance_image.split()[-1]
403
- instance_image.putalpha(ImageOps.invert(alpha))
404
-
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  example["instance_images"] = self.image_transforms(instance_image)
 
406
  example["instance_masks"], example["instance_masked_images"] = generate_random_mask(example["instance_images"])
407
  # torchvision.transforms.functional.to_pil_image(example["instance_masks"][0]).save('foo_instance_masks.png')
408
  # torchvision.transforms.functional.to_pil_image(example["instance_masked_images"][0]).save('foo_instance_masked_images.png')
409
  example["instance_images"] = example["instance_images"][:3]
410
  # torchvision.transforms.functional.to_pil_image(example["instance_images"]).save('foo_instance_image.png')
 
411
  example["instance_prompt_ids"] = self.tokenizer(
412
  # instance_prompt,
413
- os.path.basename(instance_prompt),
414
  padding="max_length" if self.pad_tokens else "do_not_pad",
415
  truncation=True,
416
  max_length=self.tokenizer.model_max_length,
@@ -884,7 +895,7 @@ def main(args):
884
  # Add noise to the latents according to the noise magnitude at each timestep
885
  # (this is the forward diffusion process)
886
  noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
887
-
888
  # Get the text embedding for conditioning
889
  with text_enc_context:
890
  if not args.not_cache_latents:
 
290
  # print('foobar', mask.shape)
291
  mask = image[-1].unsqueeze(0)
292
  # torchvision.transforms.functional.to_pil_image(mask).save('foomask2.png')
 
 
 
 
293
  # print('foobar',image.shape)
294
  # torchvision.transforms.functional.to_pil_image(image[:4]).save('fooimageinside2.png')
295
  # print('foobar',image[:3].shape)
 
297
  # torchvision.transforms.functional.to_pil_image(masked_image).save('foomaskedimage2.png')
298
  return mask, masked_image
299
 
300
+ # image_transforms = transforms.Compose(
301
+ # [
302
+ # torchvision.transforms.RandomHorizontalFlip(0.5 * True),
303
+ # # transforms.Resize(size, interpolation=transforms.InterpolationMode.BILINEAR),
304
+ # # transforms.CenterCrop(size) if center_crop else transforms.RandomCrop(size),
305
+ # transforms.RandomResizedCrop(512, scale=(0.75,1.0)),
306
+ # torchvision.transforms.ToTensor(),
307
+ # torchvision.transforms.Normalize([0.5], [0.5]),
308
+ # ]
309
+ # )
310
  # instance_image = Image.open("/home/neil/Documents/diffusers/examples/dreambooth/512_/a corner sofa in midnight blue in a room with vibrant walls and eclectic artwork, creating a lively and energetic atmosphere. .png")
311
  # alpha = instance_image.split()[-1]
312
  # instance_image.putalpha(ImageOps.invert(alpha))
 
382
  self.image_transforms = transforms.Compose(
383
  [
384
  torchvision.transforms.RandomHorizontalFlip(0.5 * hflip),
385
+ # transforms.RandomResizedCrop(size, scale=(0.75,1.0)),
386
  torchvision.transforms.ToTensor(),
387
  torchvision.transforms.Normalize([0.5], [0.5]),
388
  ]
 
395
  example = {}
396
  instance_path, instance_prompt = self.instance_images_path[index % self.num_instance_images]
397
  instance_image = Image.open(instance_path)
398
+ # alpha = instance_image.split()[-1]
399
+ # !!!! for this dataset already inverted
400
+ # instance_image.putalpha(ImageOps.invert(alpha))
401
+ # instance_image.putalpha(alpha)
402
+
403
+ # if random.uniform(0, 1) < 0.25:
404
+ # # print(instance_path)
405
+ # # print(type(instance_path))
406
+ # norm_path = str(instance_path).replace('/combined/','/combined_norm/')
407
+ # norm_path = norm_path.split('_')
408
+ # instance_image = Image.open(Path("_".join(norm_path[:-5])+'_'+norm_path[-1]))
409
+ # instance_image.putalpha(Image.new('L',instance_image.size,(255)))
410
+ # # prior = True
411
+ # # mask = torch.zeros_like(image[:1])
412
+ # # mask.fill_(1.)
413
+
414
  example["instance_images"] = self.image_transforms(instance_image)
415
+ # using my bastardized prior preservation
416
  example["instance_masks"], example["instance_masked_images"] = generate_random_mask(example["instance_images"])
417
  # torchvision.transforms.functional.to_pil_image(example["instance_masks"][0]).save('foo_instance_masks.png')
418
  # torchvision.transforms.functional.to_pil_image(example["instance_masked_images"][0]).save('foo_instance_masked_images.png')
419
  example["instance_images"] = example["instance_images"][:3]
420
  # torchvision.transforms.functional.to_pil_image(example["instance_images"]).save('foo_instance_image.png')
421
+ # print(os.path.splitext(os.path.basename(instance_path))[0])
422
  example["instance_prompt_ids"] = self.tokenizer(
423
  # instance_prompt,
424
+ os.path.splitext(os.path.basename(instance_path))[0],
425
  padding="max_length" if self.pad_tokens else "do_not_pad",
426
  truncation=True,
427
  max_length=self.tokenizer.model_max_length,
 
895
  # Add noise to the latents according to the noise magnitude at each timestep
896
  # (this is the forward diffusion process)
897
  noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
898
+ # breakpoint()
899
  # Get the text embedding for conditioning
900
  with text_enc_context:
901
  if not args.not_cache_latents:
unet/diffusion_pytorch_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:057028cbeb1ca5aacd36fc81ed7f2c5ae063330d4bfd37c1a42b63cc77d0e50d
3
  size 1719154104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09ff49c01ca2f73f52eb4c028a250767ebe279c994f1d82fb00576985cf625f4
3
  size 1719154104