Skip to content

Commit 6927825

Browse files
committed
Fix CUDA bitpacked sliced output allocation
Decode sliced bit-packed arrays in padded coordinates by sizing and launching for offset + len. This keeps the returned offset..offset+len device slice in bounds and ensures the final touched 1024-value chunk is decoded. Signed-off-by: "Alexander Droste" <alexander.droste@protonmail.com> Signed-off-by: Alexander Droste <alexander.droste@protonmail.com>
1 parent 733ab9e commit 6927825

1 file changed

Lines changed: 13 additions & 5 deletions

File tree

vortex-cuda/src/kernel/encodings/bitpacked.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -177,14 +177,16 @@ where
177177
// Get CUDA view of input
178178
let input_view = device_input.cuda_view::<A::Physical>()?;
179179

180+
let output_len = offset + len;
181+
180182
// Allocate output buffer
181-
let output_slice = ctx.device_alloc::<A>(len.next_multiple_of(1024))?;
183+
let output_slice = ctx.device_alloc::<A>(output_len.next_multiple_of(1024))?;
182184
let output_buf = CudaDeviceBuffer::new(output_slice);
183185
let output_view = output_buf.as_view::<A>();
184186

185187
let output_width = size_of::<A>() * 8;
186188
let cuda_function = bitpacked_cuda_kernel(bit_width, output_width, ctx)?;
187-
let config = bitpacked_cuda_launch_config(output_width, len)?;
189+
let config = bitpacked_cuda_launch_config(output_width, output_len)?;
188190

189191
// We hold this here to keep the device buffers alive.
190192
let device_patches = if let Some(patches) = patches {
@@ -199,7 +201,7 @@ where
199201

200202
let patches_arg = build_gpu_patches(device_patches.as_ref())?;
201203

202-
ctx.launch_kernel_config(&cuda_function, config, len, |args| {
204+
ctx.launch_kernel_config(&cuda_function, config, output_len, |args| {
203205
args.arg(&input_view)
204206
.arg(&output_view)
205207
.arg(&reference)
@@ -552,8 +554,14 @@ mod tests {
552554
Ok(())
553555
}
554556

557+
#[rstest]
558+
#[case(67, 3969)]
559+
#[case(1, 1025)]
555560
#[crate::test]
556-
fn test_cuda_bitunpack_sliced() -> VortexResult<()> {
561+
fn test_cuda_bitunpack_sliced(
562+
#[case] slice_start: usize,
563+
#[case] slice_end: usize,
564+
) -> VortexResult<()> {
557565
let mut ctx = vortex_array::array_session().create_execution_ctx();
558566
let bit_width = 32;
559567
let mut cuda_ctx = CudaSession::create_execution_ctx(&crate::cuda_session())
@@ -570,7 +578,7 @@ mod tests {
570578

571579
let bitpacked_array = BitPacked::encode(&primitive_array.into_array(), bit_width, &mut ctx)
572580
.vortex_expect("operation should succeed in test");
573-
let sliced_array = bitpacked_array.into_array().slice(67..3969)?;
581+
let sliced_array = bitpacked_array.into_array().slice(slice_start..slice_end)?;
574582
let gpu_result = block_on(async {
575583
BitPackedExecutor
576584
.execute(sliced_array.clone(), &mut cuda_ctx)

0 commit comments

Comments
 (0)