Skip to content

Commit ad31b9d

Browse files
committed
Avoid redundant computation due to auto+Eigen
1 parent 44080b8 commit ad31b9d

File tree

5 files changed

+26
-34
lines changed

5 files changed

+26
-34
lines changed

include/neural-graphics-primitives/common_device.cuh

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,10 @@ inline __host__ __device__ Eigen::Array3f linear_to_srgb_derivative(const Eigen:
7777

7878
template <uint32_t N_DIMS, typename T>
7979
__host__ __device__ Eigen::Matrix<float, N_DIMS, 1> read_image(const T* __restrict__ data, const Eigen::Vector2i& resolution, const Eigen::Vector2f& pos) {
80-
auto pos_float = Eigen::Vector2f{pos.x() * (float)(resolution.x()-1), pos.y() * (float)(resolution.y()-1)};
81-
Eigen::Vector2i texel = pos_float.cast<int>();
80+
const Eigen::Vector2f pos_float = Eigen::Vector2f{pos.x() * (float)(resolution.x()-1), pos.y() * (float)(resolution.y()-1)};
81+
const Eigen::Vector2i texel = pos_float.cast<int>();
8282

83-
auto weight = pos_float - texel.cast<float>();
83+
const Eigen::Vector2f weight = pos_float - texel.cast<float>();
8484

8585
auto read_val = [&](Eigen::Vector2i pos) {
8686
pos.x() = std::max(std::min(pos.x(), resolution.x()-1), 0);
@@ -100,22 +100,20 @@ __host__ __device__ Eigen::Matrix<float, N_DIMS, 1> read_image(const T* __restri
100100
return result;
101101
};
102102

103-
auto result = (
103+
return (
104104
(1 - weight.x()) * (1 - weight.y()) * read_val({texel.x(), texel.y()}) +
105105
(weight.x()) * (1 - weight.y()) * read_val({texel.x()+1, texel.y()}) +
106106
(1 - weight.x()) * (weight.y()) * read_val({texel.x(), texel.y()+1}) +
107107
(weight.x()) * (weight.y()) * read_val({texel.x()+1, texel.y()+1})
108108
);
109-
110-
return result;
111109
}
112110

113111
template <uint32_t N_DIMS, typename T>
114112
__device__ void deposit_image_gradient(const Eigen::Matrix<float, N_DIMS, 1>& value, T* __restrict__ gradient, T* __restrict__ gradient_weight, const Eigen::Vector2i& resolution, const Eigen::Vector2f& pos) {
115-
auto pos_float = Eigen::Vector2f{pos.x() * (resolution.x()-1), pos.y() * (resolution.y()-1)};
116-
Eigen::Vector2i texel = pos_float.cast<int>();
113+
const Eigen::Vector2f pos_float = Eigen::Vector2f{pos.x() * (resolution.x()-1), pos.y() * (resolution.y()-1)};
114+
const Eigen::Vector2i texel = pos_float.cast<int>();
117115

118-
auto weight = pos_float - texel.cast<float>();
116+
const Eigen::Vector2f weight = pos_float - texel.cast<float>();
119117

120118
auto deposit_val = [&](const Eigen::Matrix<float, N_DIMS, 1>& value, T weight, Eigen::Vector2i pos) {
121119
pos.x() = std::max(std::min(pos.x(), resolution.x()-1), 0);
@@ -210,7 +208,7 @@ inline __host__ __device__ Ray pixel_to_ray_pinhole(
210208
float focus_z = 1.0f,
211209
float dof = 0.0f
212210
) {
213-
auto uv = pixel.cast<float>().cwiseQuotient(resolution.cast<float>());
211+
const Eigen::Vector2f uv = pixel.cast<float>().cwiseQuotient(resolution.cast<float>());
214212

215213
Eigen::Vector3f dir = {
216214
(uv.x() - screen_center.x()) * (float)resolution.x() / focal_length.x(),
@@ -258,7 +256,7 @@ inline __host__ __device__ Ray pixel_to_ray(
258256
const Eigen::Vector2i distortion_resolution = Eigen::Vector2i::Zero()
259257
) {
260258
Eigen::Vector2f offset = ld_random_pixel_offset(snap_to_pixel_centers ? 0 : spp, pixel.x(), pixel.y());
261-
auto uv = (pixel.cast<float>() + offset).cwiseQuotient(resolution.cast<float>());
259+
Eigen::Vector2f uv = (pixel.cast<float>() + offset).cwiseQuotient(resolution.cast<float>());
262260

263261
Eigen::Vector3f dir;
264262
if (camera_distortion.mode == ECameraDistortionMode::FTheta) {

src/testbed.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ void Testbed::imgui() {
478478
if (ImGui::SliderFloat("Crop size", &render_diam, 0.1f, max_diam, "%.3f", ImGuiSliderFlags_Logarithmic | ImGuiSliderFlags_NoRoundToFormat)) {
479479
accum_reset = true;
480480
if (old_render_diam > 0.f && render_diam > 0.f) {
481-
auto center = (m_render_aabb.max + m_render_aabb.min) * 0.5f;
481+
const Vector3f center = (m_render_aabb.max + m_render_aabb.min) * 0.5f;
482482
float scale = render_diam / old_render_diam;
483483
m_render_aabb.max = ((m_render_aabb.max-center) * scale + center).cwiseMin(m_aabb.max);
484484
m_render_aabb.min = ((m_render_aabb.min-center) * scale + center).cwiseMax(m_aabb.min);
@@ -859,7 +859,7 @@ void Testbed::draw_visualizations(const Matrix<float, 3, 4>& camera_matrix) {
859859

860860
float zscale = 1.0f / focal[m_fov_axis];
861861
float xyscale = (float)m_window_res[m_fov_axis];
862-
auto screen_center = render_screen_center();
862+
Vector2f screen_center = render_screen_center();
863863
view2proj <<
864864
xyscale, 0, (float)m_window_res.x()*screen_center.x()*zscale, 0,
865865
0, xyscale, (float)m_window_res.y()*screen_center.y()*zscale, 0,

src/testbed_image.cu

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ __global__ void shade_kernel_image(Vector2i resolution, const Vector2f* __restri
152152

153153
uint32_t idx = x + resolution.x() * y;
154154

155-
auto uv = positions[idx];
155+
const Vector2f uv = positions[idx];
156156
if (uv.x() < 0.0f || uv.x() > 1.0f || uv.y() < 0.0f || uv.y() > 1.0f) {
157157
frame_buffer[idx] = Array4f::Zero();
158158
return;
@@ -186,7 +186,7 @@ __global__ void eval_image_kernel_and_snap(uint32_t n_elements, const T* __restr
186186

187187
auto read_val = [&](int x, int y) {
188188
auto val = ((tcnn::vector_t<T, 4>*)texture)[y * resolution.x() + x];
189-
auto result = Array4f(val[0], val[1], val[2], val[3]);
189+
Array4f result{val[0], val[1], val[2], val[3]};
190190
if (!linear_colors) {
191191
result.head<3>() = linear_to_srgb(result.head<3>());
192192
}
@@ -202,10 +202,10 @@ __global__ void eval_image_kernel_and_snap(uint32_t n_elements, const T* __restr
202202
} else {
203203
pos = (pos.cwiseProduct(resolution.cast<float>()) - Vector2f::Constant(0.5f)).cwiseMax(0.0f).cwiseMin(resolution.cast<float>() - Vector2f::Constant(1.0f + 1e-4f));
204204

205-
Vector2i pos_int = pos.cast<int>();
206-
auto weight = pos - pos_int.cast<float>();
205+
const Vector2i pos_int = pos.cast<int>();
206+
const Vector2f weight = pos - pos_int.cast<float>();
207207

208-
Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0);
208+
const Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0);
209209

210210
val =
211211
(1 - weight.x()) * (1 - weight.y()) * read_val(idx.x(), idx.y()) +

src/testbed_nerf.cu

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,49 +1142,43 @@ __device__ LossAndGradient loss_and_gradient(const Vector3f& target, const Vecto
11421142
inline __device__ Array3f composit_and_lerp(Vector2f pos, const Vector2i& resolution, uint32_t img, const __half* training_images, const Array3f& background_color, const Array3f& exposure_scale = Array3f::Ones()) {
11431143
pos = (pos.cwiseProduct(resolution.cast<float>()) - Vector2f::Constant(0.5f)).cwiseMax(0.0f).cwiseMin(resolution.cast<float>() - Vector2f::Constant(1.0f + 1e-4f));
11441144

1145-
Vector2i pos_int = pos.cast<int>();
1146-
auto weight = pos - pos_int.cast<float>();
1145+
const Vector2i pos_int = pos.cast<int>();
1146+
const Vector2f weight = pos - pos_int.cast<float>();
11471147

1148-
Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0);
1148+
const Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0);
11491149

11501150
auto read_val = [&](const Vector2i& p) {
11511151
__half val[4];
11521152
*(uint64_t*)&val[0] = ((uint64_t*)training_images)[pixel_idx(p, resolution, img)];
11531153
return Array3f{val[0], val[1], val[2]} * exposure_scale + background_color * (1.0f - (float)val[3]);
11541154
};
11551155

1156-
Array3f result = (
1156+
return (
11571157
(1 - weight.x()) * (1 - weight.y()) * read_val({idx.x(), idx.y()}) +
11581158
(weight.x()) * (1 - weight.y()) * read_val({idx.x()+1, idx.y()}) +
11591159
(1 - weight.x()) * (weight.y()) * read_val({idx.x(), idx.y()+1}) +
11601160
(weight.x()) * (weight.y()) * read_val({idx.x()+1, idx.y()+1})
11611161
);
1162-
1163-
return result;
11641162
}
11651163

11661164
inline __device__ Array3f composit(Vector2f pos, const Vector2i& resolution, uint32_t img, const __half* training_images, const Array3f& background_color, const Array3f& exposure_scale = Array3f::Ones()) {
1167-
Vector2i idx = image_pos(pos, resolution);
1168-
11691165
auto read_val = [&](const Vector2i& p) {
11701166
__half val[4];
11711167
*(uint64_t*)&val[0] = ((uint64_t*)training_images)[pixel_idx(p, resolution, img)];
11721168
return Array3f{val[0], val[1], val[2]} * exposure_scale + background_color * (1.0f - (float)val[3]);
11731169
};
11741170

1175-
return read_val(idx);
1171+
return read_val(image_pos(pos, resolution));
11761172
}
11771173

11781174
inline __device__ Array4f read_rgba(Vector2f pos, const Vector2i& resolution, uint32_t img, const __half* training_images) {
1179-
Vector2i idx = image_pos(pos, resolution);
1180-
11811175
auto read_val = [&](const Vector2i& p) {
11821176
__half val[4];
11831177
*(uint64_t*)&val[0] = ((uint64_t*)training_images)[pixel_idx(p, resolution, img)];
11841178
return Array4f{val[0], val[1], val[2], val[3]};
11851179
};
11861180

1187-
return read_val(idx);
1181+
return read_val(image_pos(pos, resolution));
11881182
}
11891183

11901184
__global__ void compute_loss_kernel_train_nerf(
@@ -1363,9 +1357,9 @@ __global__ void compute_loss_kernel_train_nerf(
13631357
}
13641358

13651359
if (error_map) {
1366-
Vector2f pos = (xy.cwiseProduct(error_map_res.cast<float>()) - Vector2f::Constant(0.5f)).cwiseMax(0.0f).cwiseMin(error_map_res.cast<float>() - Vector2f::Constant(1.0f + 1e-4f));
1367-
Vector2i pos_int = pos.cast<int>();
1368-
auto weight = pos - pos_int.cast<float>();
1360+
const Vector2f pos = (xy.cwiseProduct(error_map_res.cast<float>()) - Vector2f::Constant(0.5f)).cwiseMax(0.0f).cwiseMin(error_map_res.cast<float>() - Vector2f::Constant(1.0f + 1e-4f));
1361+
const Vector2i pos_int = pos.cast<int>();
1362+
const Vector2f weight = pos - pos_int.cast<float>();
13691363

13701364
Vector2i idx = pos_int.cwiseMin(resolution - Vector2i::Constant(2)).cwiseMax(0);
13711365

src/testbed_volume.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ void Testbed::render_volume(CudaRenderBuffer& render_buffer,
419419
m_volume.hit_counter.enlarge(2);
420420
m_volume.hit_counter.memset(0);
421421

422-
auto sky_col = m_background_color.head<3>();
422+
Array3f sky_col = m_background_color.head<3>();
423423

424424
const dim3 threads = { 16, 8, 1 };
425425
const dim3 blocks = { div_round_up((uint32_t)res.x(), threads.x), div_round_up((uint32_t)res.y(), threads.y), 1 };

0 commit comments

Comments
 (0)