Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions repro_uint8_bug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Minimal reproduction of the uint8 type closure conversion bug."""

import warp as wp

wp.init()


def create_type_closure_scalar(scalar_type):
@wp.kernel
def k(input: float, expected: float):
x = scalar_type(input)
wp.expect_eq(float(x), expected)

return k


# These work fine (int, float closures)
type_closure_kernel_int = create_type_closure_scalar(int)
type_closure_kernel_float = create_type_closure_scalar(float)

# This is the broken one
type_closure_kernel_uint8 = create_type_closure_scalar(wp.uint8)

print("Testing int closure...")
wp.launch(type_closure_kernel_int, dim=1, inputs=[-1.5, -1.0], device="cpu")
wp.synchronize()
print(" PASSED")

print("Testing float closure...")
wp.launch(type_closure_kernel_float, dim=1, inputs=[-1.5, -1.5], device="cpu")
wp.synchronize()
print(" PASSED")

print("Testing uint8 closure...")
try:
wp.launch(type_closure_kernel_uint8, dim=1, inputs=[-1.5, 255.0], device="cpu")
wp.synchronize()
print(" PASSED")
except Exception as e:
print(f" FAILED with exception: {type(e).__name__}: {e}")
12 changes: 11 additions & 1 deletion warp/_src/builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,8 +1007,17 @@ def get_diag_value_func(arg_types: Mapping[str, type], arg_values: Mapping[str,

# scalar type constructors between all storage / compute types
scalar_types_all = [*scalar_types, bool, int, float]

unsigned_int_types = (uint8, uint16, uint32, uint64)
float_src_types = {float16: "float16", float32: "float32", float64: "float64", float: "float32"}

for t in scalar_types_all:
for u in scalar_types_all:
# Use safe cast for float -> unsigned to avoid C++ UB
safe_native = None
if t in unsigned_int_types and u in float_src_types:
safe_native = f"{float_src_types[u]}_to_{t.__name__}"

add_builtin(
t.__name__,
input_types={"a": u},
Expand All @@ -1017,7 +1026,8 @@ def get_diag_value_func(arg_types: Mapping[str, type], arg_values: Mapping[str,
hidden=True,
group="Scalar Math",
export=False,
namespace="wp::" if t is not bool else "",
namespace="wp::" if t is not bool and not safe_native else "",
native_func=safe_native if safe_native else t.__name__,
)


Expand Down
59 changes: 59 additions & 0 deletions warp/native/builtin.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,46 @@ typedef uint64_t uint64;
typedef const char* str;


// Float-to-unsigned conversions: cast through int64 to avoid C++ UB
// (C++ 7.3.11: float -> unsigned is UB when truncated value is negative)
template <typename F> CUDA_CALLABLE inline int64 safe_float_to_int64(F x)
{
if (!(x == x))
return 0;
constexpr F min_int64 = static_cast<F>(-9223372036854775808.0); // -2^63
constexpr F max_overflow = static_cast<F>(9223372036854775808.0); // 2^63
if (x < min_int64)
return -9223372036854775807LL - 1LL;
if (x >= max_overflow)
return 9223372036854775807LL;
return static_cast<int64>(x);
}

template <typename F> CUDA_CALLABLE inline uint64 safe_float_to_uint64(F x)
{
if (!(x == x))
return 0;
if (x <= 0.0)
return static_cast<uint64>(safe_float_to_int64(x));
constexpr F pow2_63 = static_cast<F>(9223372036854775808.0); // 2^63
constexpr F overflow_uint64 = static_cast<F>(18446744073709551616.0); // 2^64
if (x >= overflow_uint64)
return 18446744073709551615ULL;
if (x >= pow2_63)
return static_cast<uint64>(safe_float_to_int64(x - pow2_63)) + 9223372036854775808ULL;
return static_cast<uint64>(safe_float_to_int64(x));
}

CUDA_CALLABLE inline uint8 float32_to_uint8(float32 x) { return static_cast<uint8>(safe_float_to_int64(x)); }
CUDA_CALLABLE inline uint8 float64_to_uint8(float64 x) { return static_cast<uint8>(safe_float_to_int64(x)); }
CUDA_CALLABLE inline uint16 float32_to_uint16(float32 x) { return static_cast<uint16>(safe_float_to_int64(x)); }
CUDA_CALLABLE inline uint16 float64_to_uint16(float64 x) { return static_cast<uint16>(safe_float_to_int64(x)); }
CUDA_CALLABLE inline uint32 float32_to_uint32(float32 x) { return static_cast<uint32>(safe_float_to_int64(x)); }
CUDA_CALLABLE inline uint32 float64_to_uint32(float64 x) { return static_cast<uint32>(safe_float_to_int64(x)); }
CUDA_CALLABLE inline uint64 float32_to_uint64(float32 x) { return safe_float_to_uint64(x); }
CUDA_CALLABLE inline uint64 float64_to_uint64(float64 x) { return safe_float_to_uint64(x); }


struct half;

CUDA_CALLABLE half float_to_half(float x);
Expand Down Expand Up @@ -182,6 +222,12 @@ static_assert(sizeof(half) == 2, "Size of half / float16 type must be 2-bytes");

typedef half float16;

// Handle float16 source
CUDA_CALLABLE inline uint8 float16_to_uint8(float16 x) { return float32_to_uint8(float32(x)); }
CUDA_CALLABLE inline uint16 float16_to_uint16(float16 x) { return float32_to_uint16(float32(x)); }
CUDA_CALLABLE inline uint32 float16_to_uint32(float16 x) { return float32_to_uint32(float32(x)); }
CUDA_CALLABLE inline uint64 float16_to_uint64(float16 x) { return float32_to_uint64(float32(x)); }

// Approximate division/reciprocal intrinsics
#if defined(__CUDA_ARCH__)

Expand Down Expand Up @@ -337,6 +383,19 @@ template <typename T> CUDA_CALLABLE inline void adj_float16(T x, T& adj_x, float
template <typename T> CUDA_CALLABLE inline void adj_float32(T x, T& adj_x, float32 adj_ret) { adj_x += T(adj_ret); }
template <typename T> CUDA_CALLABLE inline void adj_float64(T x, T& adj_x, float64 adj_ret) { adj_x += T(adj_ret); }

// Adjoint stubs for safe float-to-unsigned casts (no-op since they are cast functions)
template <typename T> CUDA_CALLABLE inline void adj_float32_to_uint8(T, T&, uint8) { }
template <typename T> CUDA_CALLABLE inline void adj_float64_to_uint8(T, T&, uint8) { }
template <typename T> CUDA_CALLABLE inline void adj_float16_to_uint8(T, T&, uint8) { }
template <typename T> CUDA_CALLABLE inline void adj_float32_to_uint16(T, T&, uint16) { }
template <typename T> CUDA_CALLABLE inline void adj_float64_to_uint16(T, T&, uint16) { }
template <typename T> CUDA_CALLABLE inline void adj_float16_to_uint16(T, T&, uint16) { }
template <typename T> CUDA_CALLABLE inline void adj_float32_to_uint32(T, T&, uint32) { }
template <typename T> CUDA_CALLABLE inline void adj_float64_to_uint32(T, T&, uint32) { }
template <typename T> CUDA_CALLABLE inline void adj_float16_to_uint32(T, T&, uint32) { }
template <typename T> CUDA_CALLABLE inline void adj_float32_to_uint64(T, T&, uint64) { }
template <typename T> CUDA_CALLABLE inline void adj_float64_to_uint64(T, T&, uint64) { }
template <typename T> CUDA_CALLABLE inline void adj_float16_to_uint64(T, T&, uint64) { }

#define kEps 0.0f

Expand Down
22 changes: 20 additions & 2 deletions warp/tests/test_codegen_instancing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1091,13 +1091,31 @@ def k(input: float, expected: float):
type_closure_kernel_uint8 = create_type_closure_scalar(wp.uint8)


def create_type_closure_scalar_f64(scalar_type):
@wp.kernel
def k(input: wp.float64, expected: wp.float64):
x = scalar_type(input)
wp.expect_eq(wp.float64(x), expected)

return k


type_closure_kernel_uint64_f64 = create_type_closure_scalar_f64(wp.uint64)


def test_type_closure_scalar(test, device):
with wp.ScopedDevice(device):
wp.launch(type_closure_kernel_int, dim=1, inputs=[-1.5, -1.0])
wp.launch(type_closure_kernel_float, dim=1, inputs=[-1.5, -1.5])

# FIXME: a problem with type conversions breaks this case
# wp.launch(type_closure_kernel_uint8, dim=1, inputs=[-1.5, 255.0])
wp.launch(type_closure_kernel_uint8, dim=1, inputs=[-1.5, 255.0])
wp.launch(type_closure_kernel_uint8, dim=1, inputs=[-0.1, 0.0])
wp.launch(type_closure_kernel_uint8, dim=1, inputs=[255.1, 255.0])
wp.launch(type_closure_kernel_uint8, dim=1, inputs=[128.0, 128.0])
wp.launch(type_closure_kernel_uint8, dim=1, inputs=[-100.0, 156.0])

# Test boundary cases for uint64 truncation safety with float64 precision
wp.launch(type_closure_kernel_uint64_f64, dim=1, inputs=[9223372036854774784.0, 9223372036854774784.0])


# =======================================================================
Expand Down