enable random lambertian shading in training
Browse files- assets/update_logs.md +4 -0
- main.py +5 -5
- nerf/provider.py +1 -1
- readme.md +14 -5
assets/update_logs.md
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
### 2022.10.5
|
2 |
* Basic reproduction finished.
|
3 |
* Non --cuda_ray, --tcnn are not working, need to fix.
|
|
|
1 |
+
### 2022.10.9
|
2 |
+
* The shading (partially) starts to work, at least it won't make scene empty. For some prompts, it shows better results (less severe Janus problem). The textureless rendering mode is still disabled.
|
3 |
+
* Enable shading by default (--albedo_iters 1000).
|
4 |
+
|
5 |
### 2022.10.5
|
6 |
* Basic reproduction finished.
|
7 |
* Non --cuda_ray, --tcnn are not working, need to fix.
|
main.py
CHANGED
@@ -32,7 +32,7 @@ if __name__ == '__main__':
|
|
32 |
parser.add_argument('--upsample_steps', type=int, default=64, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
|
33 |
parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
|
34 |
parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")
|
35 |
-
parser.add_argument('--albedo_iters', type=int, default=
|
36 |
# model options
|
37 |
parser.add_argument('--bg_radius', type=float, default=1.4, help="if positive, use a background model at sphere(bg_radius)")
|
38 |
parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied")
|
@@ -75,14 +75,14 @@ if __name__ == '__main__':
|
|
75 |
opt.dir_text = True
|
76 |
# use occupancy grid to prune ray sampling, faster rendering.
|
77 |
opt.cuda_ray = True
|
78 |
-
opt.lambda_entropy = 1e-4
|
79 |
-
opt.lambda_opacity = 0
|
80 |
|
81 |
elif opt.O2:
|
82 |
opt.fp16 = True
|
83 |
opt.dir_text = True
|
84 |
-
opt.lambda_entropy = 1e-
|
85 |
-
opt.lambda_opacity =
|
86 |
|
87 |
if opt.backbone == 'vanilla':
|
88 |
from nerf.network import NeRFNetwork
|
|
|
32 |
parser.add_argument('--upsample_steps', type=int, default=64, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
|
33 |
parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
|
34 |
parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")
|
35 |
+
parser.add_argument('--albedo_iters', type=int, default=1000, help="training iters that only use albedo shading")
|
36 |
# model options
|
37 |
parser.add_argument('--bg_radius', type=float, default=1.4, help="if positive, use a background model at sphere(bg_radius)")
|
38 |
parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied")
|
|
|
75 |
opt.dir_text = True
|
76 |
# use occupancy grid to prune ray sampling, faster rendering.
|
77 |
opt.cuda_ray = True
|
78 |
+
# opt.lambda_entropy = 1e-4
|
79 |
+
# opt.lambda_opacity = 0
|
80 |
|
81 |
elif opt.O2:
|
82 |
opt.fp16 = True
|
83 |
opt.dir_text = True
|
84 |
+
opt.lambda_entropy = 1e-4 # necessary to keep non-empty
|
85 |
+
opt.lambda_opacity = 3e-3 # no occupancy grid, so use a stronger opacity loss.
|
86 |
|
87 |
if opt.backbone == 'vanilla':
|
88 |
from nerf.network import NeRFNetwork
|
nerf/provider.py
CHANGED
@@ -55,7 +55,7 @@ def get_view_direction(thetas, phis, overhead, front):
|
|
55 |
return res
|
56 |
|
57 |
|
58 |
-
def rand_poses(size, device, radius_range=[1, 1.5], theta_range=[0,
|
59 |
''' generate random poses from an orbit camera
|
60 |
Args:
|
61 |
size: batch size of generated poses.
|
|
|
55 |
return res
|
56 |
|
57 |
|
58 |
+
def rand_poses(size, device, radius_range=[1, 1.5], theta_range=[0, 100], phi_range=[0, 360], return_dirs=False, angle_overhead=30, angle_front=60, jitter=False):
|
59 |
''' generate random poses from an orbit camera
|
60 |
Args:
|
61 |
size: batch size of generated poses.
|
readme.md
CHANGED
@@ -73,14 +73,24 @@ First time running will take some time to compile the CUDA extensions.
|
|
73 |
|
74 |
```bash
|
75 |
### stable-dreamfusion setting
|
76 |
-
## train with text prompt
|
77 |
# `-O` equals `--cuda_ray --fp16 --dir_text`
|
|
|
|
|
|
|
78 |
python main.py --text "a hamburger" --workspace trial -O
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
## after the training is finished:
|
81 |
-
# test (exporting 360 video
|
82 |
python main.py --workspace trial -O --test
|
83 |
-
|
|
|
84 |
# test with a GUI (free view control!)
|
85 |
python main.py --workspace trial -O --test --gui
|
86 |
|
@@ -103,7 +113,7 @@ pred_rgb_512 = F.interpolate(pred_rgb, (512, 512), mode='bilinear', align_corner
|
|
103 |
latents = self.encode_imgs(pred_rgb_512)
|
104 |
... # timestep sampling, noise adding and UNet noise predicting
|
105 |
# 3. the SDS loss, since UNet part is ignored and cannot simply audodiff, we manually set the grad for latents.
|
106 |
-
w = (1 - self.
|
107 |
grad = w * (noise_pred - noise)
|
108 |
latents.backward(gradient=grad, retain_graph=True)
|
109 |
```
|
@@ -119,7 +129,6 @@ latents.backward(gradient=grad, retain_graph=True)
|
|
119 |
Training is faster if only sample 128 points uniformly per ray (5h --> 2.5h).
|
120 |
More testing is needed...
|
121 |
* Shading & normal evaluation: `./nerf/network*.py > NeRFNetwork > forward`. Current implementation harms training and is disabled.
|
122 |
-
* use `--albedo_iters 1000` to enable random shading mode after 1000 steps from albedo, lambertian, and textureless.
|
123 |
* light direction: current implementation use a plane light source, instead of a point light source...
|
124 |
* View-dependent prompting: `./nerf/provider.py > get_view_direction`.
|
125 |
* ues `--angle_overhead, --angle_front` to set the border. How to better divide front/back/side regions?
|
|
|
73 |
|
74 |
```bash
|
75 |
### stable-dreamfusion setting
|
76 |
+
## train with text prompt (with the default settings)
|
77 |
# `-O` equals `--cuda_ray --fp16 --dir_text`
|
78 |
+
# `--cuda_ray` enables instant-ngp-like occupancy grid based acceleration.
|
79 |
+
# `--fp16` enables half-precision training.
|
80 |
+
# `--dir_text` enables view-dependent prompting.
|
81 |
python main.py --text "a hamburger" --workspace trial -O
|
82 |
|
83 |
+
# if the above command fails to generate things (learns an empty scene), maybe try:
|
84 |
+
# 1. disable random lambertian shading, simply use albedo as color:
|
85 |
+
python main.py --text "a hamburger" --workspace trial -O --albedo_iters 15000 # i.e., set --albedo_iters >= --iters, which is default to 15000
|
86 |
+
# 2. use a smaller density regularization weight:
|
87 |
+
python main.py --text "a hamburger" --workspace trial -O --lambda_entropy 1e-5
|
88 |
+
|
89 |
## after the training is finished:
|
90 |
+
# test (exporting 360 video)
|
91 |
python main.py --workspace trial -O --test
|
92 |
+
# also save a mesh (with obj, mtl, and png texture)
|
93 |
+
python main.py --workspace trial -O --test --save_mesh
|
94 |
# test with a GUI (free view control!)
|
95 |
python main.py --workspace trial -O --test --gui
|
96 |
|
|
|
113 |
latents = self.encode_imgs(pred_rgb_512)
|
114 |
... # timestep sampling, noise adding and UNet noise predicting
|
115 |
# 3. the SDS loss, since UNet part is ignored and cannot simply audodiff, we manually set the grad for latents.
|
116 |
+
w = self.alphas[t] ** 0.5 * (1 - self.alphas[t])
|
117 |
grad = w * (noise_pred - noise)
|
118 |
latents.backward(gradient=grad, retain_graph=True)
|
119 |
```
|
|
|
129 |
Training is faster if only sample 128 points uniformly per ray (5h --> 2.5h).
|
130 |
More testing is needed...
|
131 |
* Shading & normal evaluation: `./nerf/network*.py > NeRFNetwork > forward`. Current implementation harms training and is disabled.
|
|
|
132 |
* light direction: current implementation use a plane light source, instead of a point light source...
|
133 |
* View-dependent prompting: `./nerf/provider.py > get_view_direction`.
|
134 |
* ues `--angle_overhead, --angle_front` to set the border. How to better divide front/back/side regions?
|