Skip to content

Commit 17eb2ca

Browse files
committed
Constant row-encode kernel
Replace the stub `RowSizeKernel` / `RowEncodeKernel` impls for `ConstantArray` with real implementations that skip canonicalization. The size pass adds the (constant) per-row scalar size to every entry of the shared `sizes` slice. The encode pass encodes the scalar bytes once into a small heap buffer, then `copy_nonoverlapping`s those bytes into each row's slot. Per-row work is one `copy_nonoverlapping(N)` plus one cursor add, where `N` is typically 9 (i64), 5 (i32), or 17 (i128). Add a `constant_i64_*` bench triplet (arrow-row baseline, vortex with kernel, vortex through canonicalization) and a `constant_path_matches_canonical` test that round-trips bytes both ways and asserts they're identical. Signed-off-by: Claude <noreply@anthropic.com>
1 parent c0b24ad commit 17eb2ca

3 files changed

Lines changed: 96 additions & 13 deletions

File tree

vortex-row/benches/row_encode.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@ use rand::RngExt;
3030
use rand::SeedableRng;
3131
use rand::distr::Alphanumeric;
3232
use rand::rngs::StdRng;
33+
use vortex_array::Canonical;
3334
use vortex_array::IntoArray;
3435
use vortex_array::LEGACY_SESSION;
3536
use vortex_array::VortexSessionExecute;
37+
use vortex_array::arrays::ConstantArray;
3638
use vortex_array::arrays::PrimitiveArray;
3739
use vortex_array::arrays::StructArray;
3840
use vortex_array::arrays::VarBinViewArray;
@@ -175,3 +177,40 @@ fn struct_mixed_vortex(bencher: divan::Bencher) {
175177
convert_columns(&[struct_arr.clone()], &[SortField::default()], &mut ctx).unwrap()
176178
})
177179
}
180+
181+
// ---------- constant_i64 ----------
182+
183+
#[divan::bench]
184+
fn constant_i64_arrow_row(bencher: divan::Bencher) {
185+
let arr = Arc::new(Int64Array::from(vec![42i64; N])) as arrow_array::ArrayRef;
186+
let conv = RowConverter::new(vec![ArrowSortField::new(DataType::Int64)]).unwrap();
187+
let total = (N * (1 + 8)) as u64;
188+
bencher
189+
.counter(BytesCount::new(total))
190+
.bench_local(|| conv.convert_columns(&[arr.clone()]).unwrap())
191+
}
192+
193+
#[divan::bench]
194+
fn constant_i64_vortex_with_kernel(bencher: divan::Bencher) {
195+
let arr = ConstantArray::new(42i64, N).into_array();
196+
let total = (N * (1 + 8)) as u64;
197+
bencher.counter(BytesCount::new(total)).bench_local(|| {
198+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
199+
convert_columns(&[arr.clone()], &[SortField::default()], &mut ctx).unwrap()
200+
})
201+
}
202+
203+
#[divan::bench]
204+
fn constant_i64_vortex_without_kernel(bencher: divan::Bencher) {
205+
let arr = ConstantArray::new(42i64, N).into_array();
206+
let total = (N * (1 + 8)) as u64;
207+
bencher.counter(BytesCount::new(total)).bench_local(|| {
208+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
209+
let canonical = arr
210+
.clone()
211+
.execute::<Canonical>(&mut ctx)
212+
.unwrap()
213+
.into_array();
214+
convert_columns(&[canonical], &[SortField::default()], &mut ctx).unwrap()
215+
})
216+
}

vortex-row/src/kernels/constant.rs

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,39 +2,66 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
//! Row-encode kernels for `ConstantArray`.
5-
//!
6-
//! Stubs in this commit return `Ok(None)` so the dispatch loop falls back to
7-
//! canonicalization. The real impls land in a follow-up commit.
5+
6+
#![allow(
7+
clippy::cast_possible_truncation,
8+
reason = "row encoding indexes into u32-sized buffers; lengths are validated to fit in u32"
9+
)]
810

911
use vortex_array::ArrayView;
1012
use vortex_array::ExecutionCtx;
1113
use vortex_array::arrays::Constant;
1214
use vortex_error::VortexResult;
1315

16+
use crate::codec;
1417
use crate::encode::RowEncodeKernel;
1518
use crate::options::SortField;
1619
use crate::size::RowSizeKernel;
1720

1821
impl RowSizeKernel for Constant {
1922
fn row_size_contribution(
20-
_column: ArrayView<'_, Self>,
21-
_field: SortField,
22-
_sizes: &mut [u32],
23+
column: ArrayView<'_, Self>,
24+
field: SortField,
25+
sizes: &mut [u32],
2326
_ctx: &mut ExecutionCtx,
2427
) -> VortexResult<Option<()>> {
25-
Ok(None)
28+
let add = codec::encoded_size_for_scalar(column.scalar(), field)?;
29+
for s in sizes.iter_mut().take(column.len()) {
30+
*s += add;
31+
}
32+
Ok(Some(()))
2633
}
2734
}
2835

2936
impl RowEncodeKernel for Constant {
3037
fn row_encode_into(
31-
_column: ArrayView<'_, Self>,
32-
_field: SortField,
33-
_offsets: &[u32],
34-
_cursors: &mut [u32],
35-
_out: &mut [u8],
38+
column: ArrayView<'_, Self>,
39+
field: SortField,
40+
offsets: &[u32],
41+
cursors: &mut [u32],
42+
out: &mut [u8],
3643
_ctx: &mut ExecutionCtx,
3744
) -> VortexResult<Option<()>> {
38-
Ok(None)
45+
let bytes = codec::encode_scalar(column.scalar(), field)?;
46+
let len = bytes.len();
47+
let len_u32 = len as u32;
48+
let n = column.len();
49+
if len == 0 {
50+
return Ok(Some(()));
51+
}
52+
// SAFETY: bytes is len bytes; offsets[i] + cursors[i] + len <= out.len() by
53+
// construction of the buffer (the size pass already accounted for this column's
54+
// contribution). copy_nonoverlapping elides the bounds check + slice creation
55+
// that copy_from_slice would do per row.
56+
unsafe {
57+
let src = bytes.as_ptr();
58+
let out_ptr = out.as_mut_ptr();
59+
for i in 0..n {
60+
let pos = (offsets[i] + cursors[i]) as usize;
61+
std::ptr::copy_nonoverlapping(src, out_ptr.add(pos), len);
62+
cursors[i] += len_u32;
63+
}
64+
}
65+
Ok(Some(()))
3966
}
4067
}

vortex-row/src/tests.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use vortex_array::IntoArray;
1515
use vortex_array::LEGACY_SESSION;
1616
use vortex_array::VortexSessionExecute;
1717
use vortex_array::arrays::BoolArray;
18+
use vortex_array::arrays::ConstantArray;
1819
use vortex_array::arrays::ListViewArray;
1920
use vortex_array::arrays::PrimitiveArray;
2021
use vortex_array::arrays::VarBinViewArray;
@@ -222,6 +223,22 @@ fn nulls_first_and_last() -> VortexResult<()> {
222223
Ok(())
223224
}
224225

226+
#[test]
227+
fn constant_path_matches_canonical() -> VortexResult<()> {
228+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
229+
let nrows = 8usize;
230+
let const_arr = ConstantArray::new(42i64, nrows).into_array();
231+
let canonical = PrimitiveArray::from_iter(vec![42i64; nrows]).into_array();
232+
233+
let from_const = convert_columns(&[const_arr], &[SortField::default()], &mut ctx)?;
234+
let from_canon = convert_columns(&[canonical], &[SortField::default()], &mut ctx)?;
235+
assert_eq!(
236+
collect_row_bytes(&from_const),
237+
collect_row_bytes(&from_canon)
238+
);
239+
Ok(())
240+
}
241+
225242
#[test]
226243
fn struct_sort_order() -> VortexResult<()> {
227244
use vortex_array::arrays::StructArray;

0 commit comments

Comments
 (0)