Skip to content

Commit dcb5217

Browse files
committed
Faster dr::Texture construction
The Dr.Jit texture constructor currently spends a significant amount of time zero-initializing and copying arrays when these are backed by ``dr::DynamicArray<T>`` (i.e., in scalar mode). This commit changes the constructor so that it takes universal (T&&) reference to automatically copy or move as needed.
1 parent 583fde5 commit dcb5217

File tree

5 files changed

+77
-39
lines changed

5 files changed

+77
-39
lines changed

include/drjit/dynamic.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,14 @@ struct DynamicArray
407407
m_free = true;
408408
}
409409

410+
static DynamicArray map_(Value *value, size_t size) {
411+
DynamicArray result;
412+
result.m_data = value;
413+
result.m_size = size;
414+
result.m_free = false;
415+
return result;
416+
}
417+
410418
static auto counter(size_t size) {
411419
uint32_array_t<DynamicArray> result;
412420
result.init_(size);

include/drjit/tensor.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,18 @@ struct Tensor
161161
}
162162
}
163163

164+
Tensor(Array &&data, size_t ndim, const size_t *shape)
165+
: m_array(std::move(data)), m_shape(shape, shape + ndim) {
166+
size_t size = 1;
167+
for (size_t i = 0; i < ndim; ++i)
168+
size *= shape[i];
169+
if (size != m_array.size()) {
170+
drjit_fail("Tensor(): invalid size specified (%zu vs %zu)!",
171+
size, m_array.size());
172+
}
173+
}
174+
175+
164176
Tensor(const void *ptr, size_t ndim, const size_t *shape)
165177
: m_shape(shape, shape + ndim) {
166178
size_t size = 1;

include/drjit/texture.h

Lines changed: 54 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -42,34 +42,34 @@ enum class CudaTextureFormat : uint32_t {
4242
Float16 = 1, /// Half precision storage format
4343
};
4444

45-
template <typename _Storage, size_t Dimension> class Texture {
45+
template <typename Storage_, size_t Dimension> class Texture {
4646
public:
47-
static constexpr bool IsCUDA = is_cuda_v<_Storage>;
48-
static constexpr bool IsDiff = is_diff_v<_Storage>;
49-
static constexpr bool IsDynamic = is_dynamic_v<_Storage>;
47+
static constexpr bool IsCUDA = is_cuda_v<Storage_>;
48+
static constexpr bool IsDiff = is_diff_v<Storage_>;
49+
static constexpr bool IsDynamic = is_dynamic_v<Storage_>;
5050
// Only half/single-precision floating-point CUDA textures are supported
51-
static constexpr bool IsHalf = std::is_same_v<scalar_t<_Storage>, drjit::half>;
52-
static constexpr bool IsSingle = std::is_same_v<scalar_t<_Storage>, float>;
51+
static constexpr bool IsHalf = std::is_same_v<scalar_t<Storage_>, drjit::half>;
52+
static constexpr bool IsSingle = std::is_same_v<scalar_t<Storage_>, float>;
5353
static constexpr bool HasCudaTexture = (IsHalf || IsSingle) && IsCUDA;
5454
static constexpr int CudaFormat = HasCudaTexture ?
5555
IsHalf ? (int)CudaTextureFormat::Float16 : (int)CudaTextureFormat::Float32 : -1;
5656

57-
using Int32 = int32_array_t<_Storage>;
58-
using UInt32 = uint32_array_t<_Storage>;
59-
using Storage = std::conditional_t<IsDynamic, _Storage, DynamicArray<_Storage>>;
60-
using Packet = std::conditional_t<is_jit_v<_Storage>,
61-
DynamicArray<_Storage>, _Storage*>;
57+
using Int32 = int32_array_t<Storage_>;
58+
using UInt32 = uint32_array_t<Storage_>;
59+
using Storage = std::conditional_t<IsDynamic, Storage_, DynamicArray<Storage_>>;
60+
using Packet = std::conditional_t<is_jit_v<Storage_>,
61+
DynamicArray<Storage_>, Storage_*>;
6262
using TensorXf = Tensor<Storage>;
6363

6464
#define DR_TEX_ALLOC_PACKET(name, size) \
6565
Packet _packet; \
66-
_Storage* name; \
66+
Storage_* name; \
6767
\
6868
if constexpr (is_jit_v<Value>) { \
6969
_packet = empty<Packet>(m_channels_storage); \
7070
name = _packet.data(); \
7171
} else { \
72-
name = (_Storage*) alloca(sizeof(_Storage) * size); \
72+
name = (Storage_*) alloca(sizeof(Storage_) * size); \
7373
(void) _packet; \
7474
}
7575

@@ -125,15 +125,16 @@ template <typename _Storage, size_t Dimension> class Texture {
125125
* Both the \c filter_mode and \c wrap_mode have the same defaults and
126126
* behaviors as for the previous constructor.
127127
*/
128-
Texture(const TensorXf &tensor, bool use_accel = true, bool migrate = true,
128+
template <typename TensorT>
129+
Texture(TensorT &&tensor, bool use_accel = true, bool migrate = true,
129130
FilterMode filter_mode = FilterMode::Linear,
130131
WrapMode wrap_mode = WrapMode::Clamp) {
131132
if (tensor.ndim() != Dimension + 1)
132133
jit_raise("Texture::Texture(): tensor dimension must equal "
133134
"texture dimension plus one.");
134135
init(tensor.shape().data(), tensor.shape(Dimension), use_accel,
135136
filter_mode, wrap_mode);
136-
set_tensor(tensor, migrate);
137+
set_tensor(std::forward<TensorT>(tensor), migrate);
137138
}
138139

139140
Texture(Texture &&other) noexcept {
@@ -209,16 +210,17 @@ template <typename _Storage, size_t Dimension> class Texture {
209210
* When \c migrate is set to \c true on CUDA mode, the texture information
210211
* is *fully* migrated to GPU texture memory to avoid redundant storage.
211212
*/
212-
void set_value(const Storage &value, bool migrate=false) {
213-
if constexpr (!is_jit_v<_Storage>) {
213+
template <typename StorageT>
214+
void set_value(StorageT &&value, bool migrate = false) {
215+
if constexpr (!is_jit_v<Storage_>) {
214216
if (value.size() != m_size)
215217
jit_raise("Texture::set_value(): unexpected array size!");
216-
m_value.array() = value;
218+
m_value.array() = std::forward<StorageT>(value);
217219
} else /* JIT variant */ {
218220
Storage padded_value;
219221

220222
if (m_channels_storage != m_channels) {
221-
using Mask = mask_t<_Storage>;
223+
using Mask = mask_t<Storage_>;
222224
UInt32 idx = arange<UInt32>(m_size);
223225
UInt32 pixels_idx = idx / m_channels_storage;
224226
UInt32 channel_idx = idx % m_channels_storage;
@@ -230,7 +232,9 @@ template <typename _Storage, size_t Dimension> class Texture {
230232
}
231233

232234
if (padded_value.size() != m_size)
233-
jit_raise("Texture::set_value(): unexpected array size!");
235+
jit_raise(
236+
"Texture::set_value(): unexpected array size (%zu vs %zu)!",
237+
padded_value.size(), m_size);
234238

235239
// We can always re-compute the unpadded values from the padded
236240
// ones. However, if we systematically do that, users will not be
@@ -242,9 +246,11 @@ template <typename _Storage, size_t Dimension> class Texture {
242246
// the correct gradient value.
243247
// To solve this issue, we store the AD index now, and re-attach
244248
// it to the output of `tensor()` on every call.
245-
if constexpr (IsDiff)
246-
m_unpadded_value.array() =
247-
replace_grad(m_unpadded_value.array(), value);
249+
if constexpr (IsDiff) {
250+
if (grad_enabled(value))
251+
m_unpadded_value.array() =
252+
replace_grad(m_unpadded_value.array(), value);
253+
}
248254

249255
if constexpr (HasCudaTexture) {
250256
if (m_use_accel) {
@@ -286,12 +292,13 @@ template <typename _Storage, size_t Dimension> class Texture {
286292
* When \c migrate is set to \c true on CUDA mode, the texture information
287293
* is *fully* migrated to GPU texture memory to avoid redundant storage.
288294
*/
289-
void set_tensor(const TensorXf &tensor, bool migrate=false) {
295+
template <typename TensorT>
296+
void set_tensor(TensorT &&tensor, bool migrate = false) {
290297
if (tensor.ndim() != Dimension + 1)
291298
jit_raise("Texture::set_tensor(): tensor dimension must equal "
292-
"texture dimension plus one (channels).");
299+
"texture dimension plus one (channels).");
293300

294-
if (&tensor == &m_unpadded_value) {
301+
if ((void *) &tensor == (void *) &m_unpadded_value) {
295302
jit_log(::LogLevel::Warn,
296303
"Texture::set_tensor(): the `tensor` argument is a "
297304
"reference to this texture's own tensor representation "
@@ -311,9 +318,12 @@ template <typename _Storage, size_t Dimension> class Texture {
311318

312319
// Only update tensors & CUDA texture if shape changed
313320
init(tensor.shape().data(), tensor.shape(Dimension),
314-
m_use_accel, m_filter_mode, m_wrap_mode, shape_changed);
321+
m_use_accel, m_filter_mode, m_wrap_mode, shape_changed);
315322

316-
set_value(tensor.array(), migrate);
323+
if constexpr (std::is_lvalue_reference_v<TensorT>)
324+
set_value(tensor.array(), migrate);
325+
else
326+
set_value(std::move(tensor.array()), migrate);
317327
}
318328

319329
/**
@@ -342,7 +352,7 @@ template <typename _Storage, size_t Dimension> class Texture {
342352
}
343353
}
344354

345-
if constexpr (!is_jit_v<_Storage>) {
355+
if constexpr (!is_jit_v<Storage_>) {
346356
if (shape_changed)
347357
init(m_unpadded_value.shape().data(),
348358
m_unpadded_value.shape(Dimension), m_use_accel, m_filter_mode,
@@ -371,7 +381,7 @@ template <typename _Storage, size_t Dimension> class Texture {
371381
* \brief Return the texture data as a tensor object
372382
*/
373383
const TensorXf &tensor() const {
374-
if constexpr (!is_jit_v<_Storage>) {
384+
if constexpr (!is_jit_v<Storage_>) {
375385
return m_value;
376386
} else {
377387
sync_device_data();
@@ -412,7 +422,7 @@ template <typename _Storage, size_t Dimension> class Texture {
412422
*/
413423
TensorXf &tensor() {
414424
return const_cast<TensorXf &>(
415-
const_cast<const Texture<_Storage, Dimension> *>(this)->tensor());
425+
const_cast<const Texture<Storage_, Dimension> *>(this)->tensor());
416426
}
417427

418428
/**
@@ -1386,7 +1396,7 @@ template <typename _Storage, size_t Dimension> class Texture {
13861396
m_channels = channels;
13871397

13881398
// Determine padding used for channels depending on backend
1389-
if constexpr (is_jit_v<_Storage>) {
1399+
if constexpr (is_jit_v<Storage_>) {
13901400
m_channels_storage = 1;
13911401
while (m_channels_storage < m_channels)
13921402
m_channels_storage <<= 1;
@@ -1413,10 +1423,18 @@ template <typename _Storage, size_t Dimension> class Texture {
14131423
m_wrap_mode = wrap_mode;
14141424

14151425
if (init_tensor) {
1416-
m_value =
1417-
TensorXf(empty<Storage>(m_size), Dimension + 1, tensor_shape);
1418-
m_unpadded_value =
1419-
TensorXf(empty<Storage>(unpadded_size), Dimension + 1, m_shape);
1426+
if constexpr (is_jit_v<Storage_>) {
1427+
m_value =
1428+
TensorXf(empty<Storage>(m_size), Dimension + 1, tensor_shape);
1429+
m_unpadded_value =
1430+
TensorXf(empty<Storage>(unpadded_size), Dimension + 1, m_shape);
1431+
} else {
1432+
// Don't allocate memory in scalar modes
1433+
m_value =
1434+
TensorXf(Storage::map_(nullptr, m_size), Dimension + 1, tensor_shape);
1435+
m_unpadded_value =
1436+
TensorXf(Storage::map_(nullptr, unpadded_size), Dimension + 1, m_shape);
1437+
}
14201438
}
14211439

14221440
if constexpr (HasCudaTexture) {

src/python/texture.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ void bind_texture(nb::module_ &m, const char *name) {
2626
"filter_mode"_a = dr::FilterMode::Linear,
2727
"wrap_mode"_a = dr::WrapMode::Clamp,
2828
doc_Texture_init_tensor)
29-
.def("set_value", &Tex::set_value, "value"_a, "migrate"_a = false, doc_Texture_set_value)
30-
.def("set_tensor", &Tex::set_tensor, "tensor"_a, "migrate"_a = false, doc_Texture_set_tensor)
29+
.def("set_value", &Tex::template set_value<const typename Tex::Storage &>, "value"_a, "migrate"_a = false, doc_Texture_set_value)
30+
.def("set_tensor", &Tex::template set_tensor<const typename Tex::TensorXf &>, "tensor"_a, "migrate"_a = false, doc_Texture_set_tensor)
3131
.def("inplace_update", &Tex::inplace_update, "migrate"_a = false, doc_Texture_inplace_update)
3232
.def("value", &Tex::value, nb::rv_policy::reference_internal, doc_Texture_value)
3333
.def("tensor",

0 commit comments

Comments
 (0)