Webaverse
/

Stable-Dreamfusion

stable-diffusion

dreamfusion

text2mesh

Model card Files Files and versions Community

ashawkey commited on Oct 7, 2022

Commit

6875ba9

1 Parent(s): a9e51b3

misc fix

Browse files

Files changed (4) hide show

main.py +1 -1
nerf/renderer.py +1 -1
nerf/sd.py +5 -3
readme.md +10 -2

main.py CHANGED Viewed

@@ -28,7 +28,7 @@ if __name__ == '__main__':
     parser.add_argument('--ckpt', type=str, default='latest')
     parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
     parser.add_argument('--max_steps', type=int, default=1024, help="max num steps sampled per ray (only valid when using --cuda_ray)")
-    parser.add_argument('--num_steps', type=int, default=256, help="num steps sampled per ray (only valid when not using --cuda_ray)")
     parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
     parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
     parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")

     parser.add_argument('--ckpt', type=str, default='latest')
     parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
     parser.add_argument('--max_steps', type=int, default=1024, help="max num steps sampled per ray (only valid when using --cuda_ray)")
+    parser.add_argument('--num_steps', type=int, default=128, help="num steps sampled per ray (only valid when not using --cuda_ray)")
     parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
     parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
     parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")

nerf/renderer.py CHANGED Viewed

@@ -271,7 +271,7 @@ class NeRFRenderer(nn.Module):
             print(f'[INFO] writing obj mesh to {obj_file}')
             with open(obj_file, "w") as fp:
-                fp.write(f'mtllib {name}.mtl \n')
                 print(f'[INFO] writing vertices {v_np.shape}')
                 for v in v_np:

             print(f'[INFO] writing obj mesh to {obj_file}')
             with open(obj_file, "w") as fp:
+                fp.write(f'mtllib {name}mesh.mtl \n')
                 print(f'[INFO] writing vertices {v_np.shape}')
                 for v in v_np:

nerf/sd.py CHANGED Viewed

@@ -41,6 +41,7 @@ class StableDiffusion(nn.Module):
         # 4. Create a scheduler for inference
         self.scheduler = PNDMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=self.num_train_timesteps)
         print(f'[INFO] loaded stable diffusion!')
@@ -93,8 +94,9 @@ class StableDiffusion(nn.Module):
         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
         noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
-        # w(t), one_minus_alpha_prod, i.e., sigma^2
-        w = (1 - self.scheduler.alphas_cumprod[t]).to(self.device)
         grad = w * (noise_pred - noise)
         # clip grad for stable training?
@@ -105,7 +107,7 @@ class StableDiffusion(nn.Module):
         latents.backward(gradient=grad, retain_graph=True)
         # torch.cuda.synchronize(); print(f'[TIME] guiding: backward {time.time() - _t:.4f}s')
-        return 0 # fake loss value
     def produce_latents(self, text_embeddings, height=512, width=512, num_inference_steps=50, guidance_scale=7.5, latents=None):

         # 4. Create a scheduler for inference
         self.scheduler = PNDMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=self.num_train_timesteps)
+        self.alphas = self.scheduler.alphas_cumprod.to(self.device) # for convenience
         print(f'[INFO] loaded stable diffusion!')
         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
         noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+        # w(t), alpha_t * sigma_t^2
+        # w = (1 - self.alphas[t])
+        w = self.alphas[t] ** 0.5 * (1 - self.alphas[t])
         grad = w * (noise_pred - noise)
         # clip grad for stable training?
         latents.backward(gradient=grad, retain_graph=True)
         # torch.cuda.synchronize(); print(f'[TIME] guiding: backward {time.time() - _t:.4f}s')
+        return 0 # dummy loss value
     def produce_latents(self, text_embeddings, height=512, width=512, num_inference_steps=50, guidance_scale=7.5, latents=None):

readme.md CHANGED Viewed

@@ -39,14 +39,15 @@ cd stable-dreamfusion
 ```bash
 pip install -r requirements.txt
 # (optional) install the tcnn backbone if using --tcnn
 pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
 # (optional) install CLIP guidance for the dreamfield setting
 pip install git+https://github.com/openai/CLIP.git
-# (optional) install nvdiffrast for exporting textured mesh
-pip install git+https://github.com/NVlabs/nvdiffrast/
 ```
 ### Build extension (optional)
@@ -108,6 +109,13 @@ latents.backward(gradient=grad, retain_graph=True)
     * The generation seems quite sensitive to regularizations on weights_sum (alphas for each ray). The original opacity loss tends to make NeRF disappear (zero density everywhere), so we use an entropy loss to replace it for now (encourages alpha to be either 0 or 1).
 * NeRF Rendering core function: `./nerf/renderer.py > NeRFRenderer > run_cuda`.
     * the occupancy grid based training acceleration (instant-ngp like, enabled by `--cuda_ray`) may harm the generation progress, since once a grid cell is marked as empty, rays won't pass it later...
 * Shading & normal evaluation: `./nerf/network*.py > NeRFNetwork > forward`. Current implementation harms training and is disabled.
     * use `--albedo_iters 1000` to enable random shading mode after 1000 steps from albedo, lambertian, and textureless.
     * light direction: current implementation use a plane light source, instead of a point light source...

 ```bash
 pip install -r requirements.txt
+# (optional) install nvdiffrast for exporting textured mesh (--save_mesh)
+pip install git+https://github.com/NVlabs/nvdiffrast/
 # (optional) install the tcnn backbone if using --tcnn
 pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
 # (optional) install CLIP guidance for the dreamfield setting
 pip install git+https://github.com/openai/CLIP.git
 ```
 ### Build extension (optional)
     * The generation seems quite sensitive to regularizations on weights_sum (alphas for each ray). The original opacity loss tends to make NeRF disappear (zero density everywhere), so we use an entropy loss to replace it for now (encourages alpha to be either 0 or 1).
 * NeRF Rendering core function: `./nerf/renderer.py > NeRFRenderer > run_cuda`.
     * the occupancy grid based training acceleration (instant-ngp like, enabled by `--cuda_ray`) may harm the generation progress, since once a grid cell is marked as empty, rays won't pass it later...
+    * Not using `--cuda_ray` also works now:
+        ```bash
+        # `-O2` equals `--fp16 --dir_text`
+        python main.py --text "a hamburger" --workspace trial -O2 # faster training, but slower rendering
+        ```
+        Training is faster if only sample 128 points uniformly per ray (5h --> 2.5h).
+        More testing is needed...
 * Shading & normal evaluation: `./nerf/network*.py > NeRFNetwork > forward`. Current implementation harms training and is disabled.
     * use `--albedo_iters 1000` to enable random shading mode after 1000 steps from albedo, lambertian, and textureless.
     * light direction: current implementation use a plane light source, instead of a point light source...