ashawkey commited on
Commit
6875ba9
·
1 Parent(s): a9e51b3
Files changed (4) hide show
  1. main.py +1 -1
  2. nerf/renderer.py +1 -1
  3. nerf/sd.py +5 -3
  4. readme.md +10 -2
main.py CHANGED
@@ -28,7 +28,7 @@ if __name__ == '__main__':
28
  parser.add_argument('--ckpt', type=str, default='latest')
29
  parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
30
  parser.add_argument('--max_steps', type=int, default=1024, help="max num steps sampled per ray (only valid when using --cuda_ray)")
31
- parser.add_argument('--num_steps', type=int, default=256, help="num steps sampled per ray (only valid when not using --cuda_ray)")
32
  parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
33
  parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
34
  parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")
 
28
  parser.add_argument('--ckpt', type=str, default='latest')
29
  parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
30
  parser.add_argument('--max_steps', type=int, default=1024, help="max num steps sampled per ray (only valid when using --cuda_ray)")
31
+ parser.add_argument('--num_steps', type=int, default=128, help="num steps sampled per ray (only valid when not using --cuda_ray)")
32
  parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
33
  parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
34
  parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")
nerf/renderer.py CHANGED
@@ -271,7 +271,7 @@ class NeRFRenderer(nn.Module):
271
 
272
  print(f'[INFO] writing obj mesh to {obj_file}')
273
  with open(obj_file, "w") as fp:
274
- fp.write(f'mtllib {name}.mtl \n')
275
 
276
  print(f'[INFO] writing vertices {v_np.shape}')
277
  for v in v_np:
 
271
 
272
  print(f'[INFO] writing obj mesh to {obj_file}')
273
  with open(obj_file, "w") as fp:
274
+ fp.write(f'mtllib {name}mesh.mtl \n')
275
 
276
  print(f'[INFO] writing vertices {v_np.shape}')
277
  for v in v_np:
nerf/sd.py CHANGED
@@ -41,6 +41,7 @@ class StableDiffusion(nn.Module):
41
 
42
  # 4. Create a scheduler for inference
43
  self.scheduler = PNDMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=self.num_train_timesteps)
 
44
 
45
  print(f'[INFO] loaded stable diffusion!')
46
 
@@ -93,8 +94,9 @@ class StableDiffusion(nn.Module):
93
  noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
94
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
95
 
96
- # w(t), one_minus_alpha_prod, i.e., sigma^2
97
- w = (1 - self.scheduler.alphas_cumprod[t]).to(self.device)
 
98
  grad = w * (noise_pred - noise)
99
 
100
  # clip grad for stable training?
@@ -105,7 +107,7 @@ class StableDiffusion(nn.Module):
105
  latents.backward(gradient=grad, retain_graph=True)
106
  # torch.cuda.synchronize(); print(f'[TIME] guiding: backward {time.time() - _t:.4f}s')
107
 
108
- return 0 # fake loss value
109
 
110
  def produce_latents(self, text_embeddings, height=512, width=512, num_inference_steps=50, guidance_scale=7.5, latents=None):
111
 
 
41
 
42
  # 4. Create a scheduler for inference
43
  self.scheduler = PNDMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=self.num_train_timesteps)
44
+ self.alphas = self.scheduler.alphas_cumprod.to(self.device) # for convenience
45
 
46
  print(f'[INFO] loaded stable diffusion!')
47
 
 
94
  noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
95
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
96
 
97
+ # w(t), alpha_t * sigma_t^2
98
+ # w = (1 - self.alphas[t])
99
+ w = self.alphas[t] ** 0.5 * (1 - self.alphas[t])
100
  grad = w * (noise_pred - noise)
101
 
102
  # clip grad for stable training?
 
107
  latents.backward(gradient=grad, retain_graph=True)
108
  # torch.cuda.synchronize(); print(f'[TIME] guiding: backward {time.time() - _t:.4f}s')
109
 
110
+ return 0 # dummy loss value
111
 
112
  def produce_latents(self, text_embeddings, height=512, width=512, num_inference_steps=50, guidance_scale=7.5, latents=None):
113
 
readme.md CHANGED
@@ -39,14 +39,15 @@ cd stable-dreamfusion
39
  ```bash
40
  pip install -r requirements.txt
41
 
 
 
 
42
  # (optional) install the tcnn backbone if using --tcnn
43
  pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
44
 
45
  # (optional) install CLIP guidance for the dreamfield setting
46
  pip install git+https://github.com/openai/CLIP.git
47
 
48
- # (optional) install nvdiffrast for exporting textured mesh
49
- pip install git+https://github.com/NVlabs/nvdiffrast/
50
  ```
51
 
52
  ### Build extension (optional)
@@ -108,6 +109,13 @@ latents.backward(gradient=grad, retain_graph=True)
108
  * The generation seems quite sensitive to regularizations on weights_sum (alphas for each ray). The original opacity loss tends to make NeRF disappear (zero density everywhere), so we use an entropy loss to replace it for now (encourages alpha to be either 0 or 1).
109
  * NeRF Rendering core function: `./nerf/renderer.py > NeRFRenderer > run_cuda`.
110
  * the occupancy grid based training acceleration (instant-ngp like, enabled by `--cuda_ray`) may harm the generation progress, since once a grid cell is marked as empty, rays won't pass it later...
 
 
 
 
 
 
 
111
  * Shading & normal evaluation: `./nerf/network*.py > NeRFNetwork > forward`. Current implementation harms training and is disabled.
112
  * use `--albedo_iters 1000` to enable random shading mode after 1000 steps from albedo, lambertian, and textureless.
113
  * light direction: current implementation use a plane light source, instead of a point light source...
 
39
  ```bash
40
  pip install -r requirements.txt
41
 
42
+ # (optional) install nvdiffrast for exporting textured mesh (--save_mesh)
43
+ pip install git+https://github.com/NVlabs/nvdiffrast/
44
+
45
  # (optional) install the tcnn backbone if using --tcnn
46
  pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
47
 
48
  # (optional) install CLIP guidance for the dreamfield setting
49
  pip install git+https://github.com/openai/CLIP.git
50
 
 
 
51
  ```
52
 
53
  ### Build extension (optional)
 
109
  * The generation seems quite sensitive to regularizations on weights_sum (alphas for each ray). The original opacity loss tends to make NeRF disappear (zero density everywhere), so we use an entropy loss to replace it for now (encourages alpha to be either 0 or 1).
110
  * NeRF Rendering core function: `./nerf/renderer.py > NeRFRenderer > run_cuda`.
111
  * the occupancy grid based training acceleration (instant-ngp like, enabled by `--cuda_ray`) may harm the generation progress, since once a grid cell is marked as empty, rays won't pass it later...
112
+ * Not using `--cuda_ray` also works now:
113
+ ```bash
114
+ # `-O2` equals `--fp16 --dir_text`
115
+ python main.py --text "a hamburger" --workspace trial -O2 # faster training, but slower rendering
116
+ ```
117
+ Training is faster if only sample 128 points uniformly per ray (5h --> 2.5h).
118
+ More testing is needed...
119
  * Shading & normal evaluation: `./nerf/network*.py > NeRFNetwork > forward`. Current implementation harms training and is disabled.
120
  * use `--albedo_iters 1000` to enable random shading mode after 1000 steps from albedo, lambertian, and textureless.
121
  * light direction: current implementation use a plane light source, instead of a point light source...