Skip to content

Commit 0dff7bd

Browse files
Supports Neuralangelo, bakedangelo, neus-facto-angelo (autonomousvision#134)
* empty cache for eval * load dtu highres images * add adamW * add multi-step with warpup lr scheduler * add background model for bakedsdf * make bakedsdf's beta anneal configurable * add accumulate_grad_steps * add numerical gradients * make hash encoding configurable * make frequency position encoding optional * add mask for hash encoding * add neuralangelo and config * add bakedangelo * fix typo * update default config of neuralangelo & bakedangelo * fix multi-gpu leak * disable nuscenes data parser * shared proposal network for background model * use all images + return transform and scale * zip-nerf interlevel loss * change to inerlevel_loss_zip * use proposal network for background model * disable uniform_sampler * add beta annealing to neus_factor * add neus-facto-angelo * update default config for bakedangelo * fix typo * update defulat config of neus-facto-angelo * updates
1 parent 698f900 commit 0dff7bd

22 files changed

+1059
-102
lines changed

README.md

+7
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,13 @@
1010
SDFStudio is a unified and modular framework for neural implicit surface reconstruction, built on top of the awesome nerfstudio project. We provide a unified implementation of three major implicit surface reconstruction methods: UniSurf, VolSDF, and NeuS. SDFStudio also supports various scene representions, such as MLPs, Tri-plane, and Multi-res. feature grids, and multiple point sampling strategies such as surface-guided sampling as in UniSurf, and Voxel-surface guided sampling from NeuralReconW. It further integrates recent advances in the area such as the utillization of monocular cues (MonoSDF), geometry regularization (UniSurf) and multi-view consistency (Geo-NeuS). Thanks to the unified and modular implementation, SDFStudio makes it easy to transfer ideas from one method to another. For example, Mono-NeuS applies the idea from MonoSDF to NeuS, and Geo-VolSDF applies the idea from Geo-NeuS to VolSDF.
1111

1212
# Updates
13+
14+
**2023.06.16**: Add `bakedangelo` which combines `BakedSDF` with numerical gridents and progressive training of `Neuralangelo`.
15+
16+
**2023.06.16**: Add `neus-facto-angelo` which combines `neus-facto` with numerical gridents and progressive training of `Neuralangelo`.
17+
18+
**2023.06.16**: Support [Neuralangelo](https://research.nvidia.com/labs/dir/neuralangelo/).
19+
1320
**2023.03.12**: Support [BakedSDF](https://bakedsdf.github.io/).
1421

1522
**2022.12.28**: Support [Neural RGB-D Surface Reconstruction](https://dazinovic.github.io/neural-rgbd-surface-reconstruction/).

nerfstudio/cameras/rays.py

+15
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,21 @@ class RaySamples(TensorDataclass):
128128
times: Optional[TensorType[..., 1]] = None
129129
"""Times at which rays are sampled"""
130130

131+
def get_alphas(self, densities: TensorType[..., "num_samples", 1]) -> TensorType[..., "num_samples", 1]:
132+
"""Return weights based on predicted densities
133+
134+
Args:
135+
densities: Predicted densities for samples along ray
136+
137+
Returns:
138+
Weights for each sample
139+
"""
140+
141+
delta_density = self.deltas * densities
142+
alphas = 1 - torch.exp(-delta_density)
143+
144+
return alphas
145+
131146
def get_weights(self, densities: TensorType[..., "num_samples", 1]) -> TensorType[..., "num_samples", 1]:
132147
"""Return weights based on predicted densities
133148

nerfstudio/configs/base_config.py

+2
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ class TrainerConfig(PrintableConfig):
161161
"""Optionally specify model config to load from; if none, will use the default config?"""
162162
load_scheduler: bool = True
163163
"""Whether to load the lr scheduler state_dict if exists"""
164+
accumulate_grad_steps: int = 1
165+
"""Number of gradient steps to accumulate before taking an optimizer step."""
164166

165167

166168
# Viewer related configs

nerfstudio/configs/method_configs.py

+214-1
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,12 @@
4545
PhototourismDataParserConfig,
4646
)
4747
from nerfstudio.data.dataparsers.sdfstudio_dataparser import SDFStudioDataParserConfig
48-
from nerfstudio.engine.optimizers import AdamOptimizerConfig, RAdamOptimizerConfig
48+
from nerfstudio.engine.optimizers import AdamOptimizerConfig, RAdamOptimizerConfig, AdamWOptimizerConfig
4949
from nerfstudio.engine.schedulers import (
5050
ExponentialSchedulerConfig,
5151
MultiStepSchedulerConfig,
5252
NeuSSchedulerConfig,
53+
MultiStepWarmupSchedulerConfig,
5354
)
5455
from nerfstudio.field_components.temporal_distortions import TemporalDistortionKind
5556
from nerfstudio.fields.sdf_field import SDFFieldConfig
@@ -58,6 +59,8 @@
5859
from nerfstudio.models.instant_ngp import InstantNGPModelConfig
5960
from nerfstudio.models.mipnerf import MipNerfModel
6061
from nerfstudio.models.nerfacto import NerfactoModelConfig
62+
from nerfstudio.models.neuralangelo import NeuralangeloModelConfig
63+
from nerfstudio.models.bakedangelo import BakedAngeloModelConfig
6164
from nerfstudio.models.neuralreconW import NeuralReconWModelConfig
6265
from nerfstudio.models.neus import NeuSModelConfig
6366
from nerfstudio.models.neus_acc import NeuSAccModelConfig
@@ -99,8 +102,147 @@
99102
"neus-facto-bigmlp": "NeuS-facto with big MLP, it is used in training heritage data with 8 gpus",
100103
"bakedsdf": "Implementation of BackedSDF with multi-res hash grids",
101104
"bakedsdf-mlp": "Implementation of BackedSDF with large MLPs",
105+
"neuralangelo": "Implementation of Neuralangelo",
106+
"bakedangelo": "Implementation of Neuralangelo with BakedSDF",
107+
"neus-facto-angelo": "Implementation of Neuralangelo with neus-facto",
102108
}
103109

110+
111+
method_configs["bakedangelo"] = Config(
112+
method_name="bakedangelo",
113+
trainer=TrainerConfig(
114+
steps_per_eval_image=5000,
115+
steps_per_eval_batch=5000,
116+
steps_per_save=20000,
117+
steps_per_eval_all_images=1000000, # set to a very large model so we don't eval with all images
118+
max_num_iterations=1000_001,
119+
mixed_precision=False,
120+
),
121+
pipeline=VanillaPipelineConfig(
122+
datamanager=VanillaDataManagerConfig(
123+
dataparser=SDFStudioDataParserConfig(),
124+
train_num_rays_per_batch=8192,
125+
eval_num_rays_per_batch=1024,
126+
camera_optimizer=CameraOptimizerConfig(
127+
mode="off", optimizer=AdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-2)
128+
),
129+
),
130+
model=BakedAngeloModelConfig(
131+
near_plane=0.01,
132+
far_plane=1000.0,
133+
overwrite_near_far_plane=True,
134+
sdf_field=SDFFieldConfig(
135+
use_grid_feature=True,
136+
num_layers=1,
137+
num_layers_color=4,
138+
hidden_dim=256,
139+
hidden_dim_color=256,
140+
geometric_init=True,
141+
bias=1.5,
142+
beta_init=0.1,
143+
inside_outside=True,
144+
use_appearance_embedding=True,
145+
use_numerical_gradients=True,
146+
base_res=64,
147+
max_res=4096,
148+
log2_hashmap_size=22,
149+
hash_features_per_level=8,
150+
hash_smoothstep=False,
151+
use_position_encoding=False,
152+
),
153+
eikonal_loss_mult=0.01,
154+
background_model="grid",
155+
proposal_weights_anneal_max_num_iters=10000,
156+
use_anneal_beta=True,
157+
eval_num_rays_per_chunk=1024,
158+
use_spatial_varying_eikonal_loss=False,
159+
steps_per_level=10_000,
160+
curvature_loss_warmup_steps=20_000,
161+
beta_anneal_end=0.0002,
162+
beta_anneal_max_num_iters=1000_000,
163+
),
164+
),
165+
optimizers={
166+
"proposal_networks": {
167+
"optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15),
168+
"scheduler": MultiStepSchedulerConfig(max_steps=1000_000),
169+
},
170+
"fields": {
171+
"optimizer": AdamWOptimizerConfig(lr=1e-3, eps=1e-15, weight_decay=1e-2),
172+
"scheduler": MultiStepWarmupSchedulerConfig(warm_up_end=5000, milestones=[600_000, 800_000], gamma=0.1),
173+
},
174+
"field_background": {
175+
"optimizer": AdamWOptimizerConfig(lr=1e-3, eps=1e-15),
176+
"scheduler": MultiStepWarmupSchedulerConfig(warm_up_end=5000, milestones=[300_000, 400_000], gamma=0.1),
177+
},
178+
},
179+
viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
180+
vis="viewer",
181+
)
182+
183+
184+
method_configs["neuralangelo"] = Config(
185+
method_name="neuralangelo",
186+
trainer=TrainerConfig(
187+
steps_per_eval_image=5000,
188+
steps_per_eval_batch=5000,
189+
steps_per_save=20000,
190+
steps_per_eval_all_images=1000000, # set to a very large model so we don't eval with all images
191+
max_num_iterations=500_001,
192+
mixed_precision=False,
193+
),
194+
pipeline=VanillaPipelineConfig(
195+
datamanager=VanillaDataManagerConfig(
196+
dataparser=SDFStudioDataParserConfig(),
197+
train_num_rays_per_batch=512,
198+
eval_num_rays_per_batch=512,
199+
camera_optimizer=CameraOptimizerConfig(
200+
mode="off", optimizer=AdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-2)
201+
),
202+
),
203+
model=NeuralangeloModelConfig(
204+
sdf_field=SDFFieldConfig(
205+
use_grid_feature=True,
206+
num_layers=1,
207+
num_layers_color=4,
208+
hidden_dim=256,
209+
hidden_dim_color=256,
210+
geometric_init=True,
211+
bias=0.5,
212+
beta_init=0.3,
213+
inside_outside=False,
214+
use_appearance_embedding=False,
215+
position_encoding_max_degree=6,
216+
use_numerical_gradients=True,
217+
base_res=64,
218+
max_res=4096,
219+
log2_hashmap_size=22,
220+
hash_features_per_level=8,
221+
hash_smoothstep=False,
222+
use_position_encoding=False,
223+
),
224+
background_model="mlp",
225+
enable_progressive_hash_encoding=True,
226+
enable_curvature_loss_schedule=True,
227+
enable_numerical_gradients_schedule=True,
228+
),
229+
),
230+
optimizers={
231+
"fields": {
232+
"optimizer": AdamWOptimizerConfig(lr=1e-3, weight_decay=0.01, eps=1e-15),
233+
# "scheduler": NeuSSchedulerConfig(warm_up_end=5000, learning_rate_alpha=0.05, max_steps=500000),
234+
"scheduler": MultiStepWarmupSchedulerConfig(warm_up_end=5000, milestones=[300_000, 400_000], gamma=0.1),
235+
},
236+
"field_background": {
237+
"optimizer": AdamWOptimizerConfig(lr=1e-3, eps=1e-15),
238+
"scheduler": MultiStepWarmupSchedulerConfig(warm_up_end=5000, milestones=[300_000, 400_000], gamma=0.1),
239+
},
240+
},
241+
viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
242+
vis="viewer",
243+
)
244+
245+
104246
method_configs["bakedsdf"] = Config(
105247
method_name="bakedsdf",
106248
trainer=TrainerConfig(
@@ -228,6 +370,77 @@
228370
)
229371

230372

373+
method_configs["neus-facto-angelo"] = Config(
374+
method_name="neus-facto-angelo",
375+
trainer=TrainerConfig(
376+
steps_per_eval_image=5000,
377+
steps_per_eval_batch=5000,
378+
steps_per_save=20000,
379+
steps_per_eval_all_images=1000000, # set to a very large model so we don't eval with all images
380+
max_num_iterations=1000_001,
381+
mixed_precision=False,
382+
),
383+
pipeline=VanillaPipelineConfig(
384+
datamanager=VanillaDataManagerConfig(
385+
dataparser=SDFStudioDataParserConfig(),
386+
train_num_rays_per_batch=2048,
387+
eval_num_rays_per_batch=1024,
388+
camera_optimizer=CameraOptimizerConfig(
389+
mode="off", optimizer=AdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-2)
390+
),
391+
),
392+
model=NeuSFactoModelConfig(
393+
near_plane=0.01,
394+
far_plane=1000.0,
395+
overwrite_near_far_plane=True,
396+
sdf_field=SDFFieldConfig(
397+
use_grid_feature=True,
398+
num_layers=1,
399+
num_layers_color=4,
400+
hidden_dim=256,
401+
hidden_dim_color=256,
402+
geometric_init=True,
403+
bias=0.5,
404+
beta_init=0.3,
405+
inside_outside=False,
406+
use_appearance_embedding=True,
407+
use_numerical_gradients=True,
408+
base_res=64,
409+
max_res=4096,
410+
log2_hashmap_size=22,
411+
hash_features_per_level=8,
412+
hash_smoothstep=False,
413+
use_position_encoding=False,
414+
),
415+
background_model="grid",
416+
eval_num_rays_per_chunk=1024,
417+
level_init=8,
418+
eikonal_loss_mult=0.01,
419+
use_anneal_beta=True,
420+
enable_progressive_hash_encoding=True,
421+
enable_numerical_gradients_schedule=True,
422+
enable_curvature_loss_schedule=True,
423+
curvature_loss_multi=5e-4,
424+
),
425+
),
426+
optimizers={
427+
"proposal_networks": {
428+
"optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15),
429+
"scheduler": MultiStepSchedulerConfig(max_steps=1000_000),
430+
},
431+
"fields": {
432+
"optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15),
433+
"scheduler": MultiStepWarmupSchedulerConfig(warm_up_end=5000, milestones=[600_000, 800_000], gamma=0.1),
434+
},
435+
"field_background": {
436+
"optimizer": AdamWOptimizerConfig(lr=1e-3, eps=1e-15),
437+
"scheduler": MultiStepWarmupSchedulerConfig(warm_up_end=5000, milestones=[300_000, 400_000], gamma=0.1),
438+
},
439+
},
440+
viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
441+
vis="viewer",
442+
)
443+
231444
method_configs["neus-facto"] = Config(
232445
method_name="neus-facto",
233446
trainer=TrainerConfig(

nerfstudio/data/datamanagers/base_datamanager.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
from nerfstudio.data.dataparsers.mipnerf360_dataparser import Mipnerf360DataParserConfig
4646
from nerfstudio.data.dataparsers.monosdf_dataparser import MonoSDFDataParserConfig
4747
from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig
48-
from nerfstudio.data.dataparsers.nuscenes_dataparser import NuScenesDataParserConfig
48+
# from nerfstudio.data.dataparsers.nuscenes_dataparser import NuScenesDataParserConfig
4949
from nerfstudio.data.dataparsers.phototourism_dataparser import (
5050
PhototourismDataParserConfig,
5151
)
@@ -74,7 +74,7 @@
7474
"blender-data": BlenderDataParserConfig(),
7575
"friends-data": FriendsDataParserConfig(),
7676
"instant-ngp-data": InstantNGPDataParserConfig(),
77-
"nuscenes-data": NuScenesDataParserConfig(),
77+
# "nuscenes-data": NuScenesDataParserConfig(),
7878
"record3d-data": Record3DDataParserConfig(),
7979
"dnerf-data": DNeRFDataParserConfig(),
8080
"phototourism-data": PhototourismDataParserConfig(),

nerfstudio/data/dataparsers/nerfstudio_dataparser.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ class NerfstudioDataParserConfig(DataParserConfig):
6262
"""Whether to automatically scale the poses to fit in +/- 1 bounding box."""
6363
train_split_percentage: float = 0.9
6464
"""The percent of images to use for training. The remaining images are for eval."""
65+
use_all_train_images: bool = False
66+
"""Whether to use all images for training. If True, all images are used for training."""
6567

6668

6769
@dataclass
@@ -169,6 +171,9 @@ def _generate_dataparser_outputs(self, split="train"):
169171
assert len(i_eval) == num_eval_images
170172
if split == "train":
171173
indices = i_train
174+
if self.config.use_all_train_images:
175+
indices = i_all
176+
num_train_images = num_images
172177
elif split in ["val", "test"]:
173178
indices = i_eval
174179
else:
@@ -181,7 +186,7 @@ def _generate_dataparser_outputs(self, split="train"):
181186
orientation_method = self.config.orientation_method
182187

183188
poses = torch.from_numpy(np.array(poses).astype(np.float32))
184-
poses, _ = camera_utils.auto_orient_and_center_poses(
189+
poses, transform_matrix = camera_utils.auto_orient_and_center_poses(
185190
poses,
186191
method=orientation_method,
187192
center_poses=self.config.center_poses,
@@ -190,9 +195,10 @@ def _generate_dataparser_outputs(self, split="train"):
190195
# Scale poses
191196
scale_factor = 1.0
192197
if self.config.auto_scale_poses:
193-
scale_factor /= torch.max(torch.abs(poses[:, :3, 3]))
198+
scale_factor /= float(torch.max(torch.abs(poses[:, :3, 3])))
199+
scale_factor *= self.config.scale_factor
194200

195-
poses[:, :3, 3] *= scale_factor * self.config.scale_factor
201+
poses[:, :3, 3] *= scale_factor
196202

197203
# Choose image_filenames and poses based on split, but after auto orient and scaling the poses.
198204
image_filenames = [image_filenames[i] for i in indices]
@@ -247,11 +253,21 @@ def _generate_dataparser_outputs(self, split="train"):
247253
assert self.downscale_factor is not None
248254
cameras.rescale_output_resolution(scaling_factor=1.0 / self.downscale_factor)
249255

256+
if "applied_transform" in meta:
257+
applied_transform = torch.tensor(meta["applied_transform"], dtype=transform_matrix.dtype)
258+
transform_matrix = transform_matrix @ torch.cat(
259+
[applied_transform, torch.tensor([[0, 0, 0, 1]], dtype=transform_matrix.dtype)], 0
260+
)
261+
if "applied_scale" in meta:
262+
applied_scale = float(meta["applied_scale"])
263+
scale_factor *= applied_scale
264+
250265
dataparser_outputs = DataparserOutputs(
251266
image_filenames=image_filenames,
252267
cameras=cameras,
253268
scene_box=scene_box,
254269
mask_filenames=mask_filenames if len(mask_filenames) > 0 else None,
270+
metadata={"transform": transform_matrix, "scale_factor": scale_factor},
255271
)
256272
return dataparser_outputs
257273

nerfstudio/data/dataparsers/sdfstudio_dataparser.py

+11
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,9 @@ class SDFStudioDataParserConfig(DataParserConfig):
167167
train_val_no_overlap: bool = False
168168
"""remove selected / sampled validation images from training set"""
169169
auto_orient: bool = False
170+
"""automatically orient the scene such that the up direction is the same as the viewer's up direction"""
171+
load_dtu_highres: bool = False
172+
"""load high resolution images from DTU dataset, should only be used for the preprocessed DTU dataset"""
170173

171174

172175
@dataclass
@@ -216,6 +219,14 @@ def _generate_dataparser_outputs(self, split="train"): # pylint: disable=unused
216219
cy.append(intrinsics[1, 2])
217220
camera_to_worlds.append(camtoworld)
218221

222+
# here is hard coded for DTU high-res images
223+
if self.config.load_dtu_highres:
224+
image_filename = self.config.data / "image" / frame["rgb_path"].replace("_rgb", "")
225+
intrinsics[:2, :] *= 1200 / 384.0
226+
intrinsics[0, 2] += 200
227+
height, width = 1200, 1600
228+
meta["height"], meta["width"] = height, width
229+
219230
if self.config.include_mono_prior:
220231
assert meta["has_mono_prior"]
221232
# load mono depth

0 commit comments

Comments
 (0)