Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions encodings/sparse/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,7 @@ vortex-array = { workspace = true, features = ["_test-harness"] }
[[bench]]
name = "sparse_canonical"
harness = false

[[bench]]
name = "sparse_pushdown"
harness = false
144 changes: 144 additions & 0 deletions encodings/sparse/benches/sparse_pushdown.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Benchmarks for the Sparse pushdown kernels (`is_constant`, `sum`, `min_max`,
//! `null_count`, compare).
//!
//! Each benchmark exercises the registered kernel path on a single representative
//! sparse `i32` array. All are `O(num_patches)`; the patch counts below are sized so
//! each lands in the ~10-100µs range for a stable CodSpeed signal. `between`/`fill_null`/
//! `nan_count` are omitted since they mirror the compare/null_count cost profiles.

#![expect(clippy::cast_possible_truncation)]

use std::sync::LazyLock;

use divan::Bencher;
use vortex_array::ArrayRef;
use vortex_array::Canonical;
use vortex_array::ExecutionCtx;
use vortex_array::IntoArray;
use vortex_array::VortexSessionExecute;
use vortex_array::aggregate_fn::fns::is_constant::is_constant;
use vortex_array::aggregate_fn::fns::min_max::min_max;
use vortex_array::aggregate_fn::fns::null_count::null_count;
use vortex_array::aggregate_fn::fns::sum::sum;
use vortex_array::arrays::ConstantArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::builtins::ArrayBuiltins;
use vortex_array::dtype::DType;
use vortex_array::dtype::Nullability;
use vortex_array::dtype::PType;
use vortex_array::scalar::Scalar;
use vortex_array::scalar_fn::fns::operators::Operator;
use vortex_array::session::ArraySession;
use vortex_buffer::Buffer;
use vortex_error::VortexExpect;
use vortex_session::VortexSession;
use vortex_sparse::Sparse;

fn main() {
divan::main();
}

const LEN: usize = 1_000_000;

/// Session with Sparse and its pushdown kernels registered.
static SESSION: LazyLock<VortexSession> = LazyLock::new(|| {
let session = VortexSession::empty().with::<ArraySession>();
vortex_sparse::initialize(&session);
session
});

/// Build a sparse `i32` array of `LEN` with `num_patches` uniformly-spaced patches and
/// fill value 1. When `constant` is true every patch also equals 1, so the whole array
/// is constant (the worst case for `is_constant`: it must scan all patches to confirm).
fn make_sparse(num_patches: usize, constant: bool) -> ArrayRef {
let stride = LEN / num_patches;
let indices: Buffer<u32> = (0..num_patches).map(|i| (i * stride) as u32).collect();
let values: Buffer<i32> = (0..num_patches)
.map(|i| if constant { 1 } else { 2 + i as i32 })
.collect();
Sparse::try_new(
indices.into_array(),
values.into_array(),
LEN,
Scalar::from(1i32),
)
.vortex_expect("valid sparse")
.into_array()
}

/// Build a sparse `i32` array of `LEN` with a null fill and `num_patches` nullable patches
/// (every third patch null), so `null_count` does real `O(P)` work over the patch validity.
fn make_sparse_nullable(num_patches: usize) -> ArrayRef {
let stride = LEN / num_patches;
let indices: Buffer<u32> = (0..num_patches).map(|i| (i * stride) as u32).collect();
let values = PrimitiveArray::from_option_iter(
(0..num_patches).map(|i| if i % 3 == 0 { None } else { Some(i as i32) }),
)
.into_array();
let nullable = DType::Primitive(PType::I32, Nullability::Nullable);
Sparse::try_new(indices.into_array(), values, LEN, Scalar::null(nullable))
.vortex_expect("valid sparse")
.into_array()
}

#[divan::bench]
fn sparse_is_constant(bencher: Bencher) {
bencher
.with_inputs(|| (make_sparse(100_000, true), SESSION.create_execution_ctx()))
.bench_values(|(array, mut ctx)| {
divan::black_box(is_constant(&array, &mut ctx).vortex_expect("is_constant"))
});
}

#[divan::bench]
fn sparse_sum(bencher: Bencher) {
bencher
.with_inputs(|| (make_sparse(100_000, false), SESSION.create_execution_ctx()))
.bench_values(|(array, mut ctx)| {
divan::black_box(sum(&array, &mut ctx).vortex_expect("sum"))
});
}

#[divan::bench]
fn sparse_min_max(bencher: Bencher) {
bencher
.with_inputs(|| (make_sparse(40_000, false), SESSION.create_execution_ctx()))
.bench_values(|(array, mut ctx)| {
divan::black_box(min_max(&array, &mut ctx).vortex_expect("min_max"))
});
}

#[divan::bench]
fn sparse_null_count(bencher: Bencher) {
bencher
.with_inputs(|| {
(
make_sparse_nullable(130_000),
SESSION.create_execution_ctx(),
)
})
.bench_values(|(array, mut ctx)| {
divan::black_box(null_count(&array, &mut ctx).vortex_expect("null_count"))
});
}

#[divan::bench]
fn sparse_compare(bencher: Bencher) {
bencher
.with_inputs(|| (make_sparse(10_000, false), SESSION.create_execution_ctx()))
.bench_values(|(array, mut ctx)| {
let rhs = ConstantArray::new(Scalar::from(1i32), array.len()).into_array();
let result = array.binary(rhs, Operator::Eq).vortex_expect("binary");
divan::black_box(materialize(result, &mut ctx))
});
}

fn materialize(array: ArrayRef, ctx: &mut ExecutionCtx) -> ArrayRef {
array
.execute::<Canonical>(ctx)
.vortex_expect("execute")
.into_array()
}
14 changes: 14 additions & 0 deletions encodings/sparse/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,22 @@ impl vortex_array::arrays::slice::SliceKernel for vortex_sparse::Sparse

pub fn vortex_sparse::Sparse::slice(vortex_array::array::view::ArrayView<'_, Self>, core::ops::range::Range<usize>, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::array::erased::ArrayRef>>

impl vortex_array::scalar_fn::fns::between::kernel::BetweenKernel for vortex_sparse::Sparse

pub fn vortex_sparse::Sparse::between(vortex_array::array::view::ArrayView<'_, Self>, &vortex_array::array::erased::ArrayRef, &vortex_array::array::erased::ArrayRef, &vortex_array::scalar_fn::fns::between::BetweenOptions, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::array::erased::ArrayRef>>

impl vortex_array::scalar_fn::fns::binary::compare::CompareKernel for vortex_sparse::Sparse

pub fn vortex_sparse::Sparse::compare(vortex_array::array::view::ArrayView<'_, Self>, &vortex_array::array::erased::ArrayRef, vortex_array::scalar_fn::fns::operators::CompareOperator, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::array::erased::ArrayRef>>

impl vortex_array::scalar_fn::fns::cast::kernel::CastReduce for vortex_sparse::Sparse

pub fn vortex_sparse::Sparse::cast(vortex_array::array::view::ArrayView<'_, Self>, &vortex_array::dtype::DType) -> vortex_error::VortexResult<core::option::Option<vortex_array::array::erased::ArrayRef>>

impl vortex_array::scalar_fn::fns::fill_null::kernel::FillNullKernel for vortex_sparse::Sparse

pub fn vortex_sparse::Sparse::fill_null(vortex_array::array::view::ArrayView<'_, Self>, &vortex_array::scalar::Scalar, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::array::erased::ArrayRef>>

impl vortex_array::scalar_fn::fns::not::kernel::NotReduce for vortex_sparse::Sparse

pub fn vortex_sparse::Sparse::invert(vortex_array::array::view::ArrayView<'_, Self>) -> vortex_error::VortexResult<core::option::Option<vortex_array::array::erased::ArrayRef>>
Expand Down Expand Up @@ -222,4 +234,6 @@ pub fn vortex_array::array::view::ArrayView<'_, vortex_sparse::Sparse>::patches(

pub fn vortex_array::array::view::ArrayView<'_, vortex_sparse::Sparse>::resolved_patches(&self) -> vortex_error::VortexResult<vortex_array::patches::Patches>

pub fn vortex_sparse::initialize(&vortex_session::VortexSession)

pub type vortex_sparse::SparseArray = vortex_array::array::typed::Array<vortex_sparse::Sparse>
137 changes: 137 additions & 0 deletions encodings/sparse/src/compute/between.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_array::ArrayRef;
use vortex_array::ArrayView;
use vortex_array::ExecutionCtx;
use vortex_array::IntoArray;
use vortex_array::arrays::ConstantArray;
use vortex_array::builtins::ArrayBuiltins;
use vortex_array::scalar_fn::fns::between::BetweenKernel;
use vortex_array::scalar_fn::fns::between::BetweenOptions;
use vortex_error::VortexResult;

use crate::Sparse;
use crate::SparseExt as _;

/// Sparse-specific between kernel.
///
/// `lower <= x <= upper` (with per-bound strictness) over a Sparse column with constant
/// bounds is itself sparse: every unpatched position resolves to `between(F, lo, hi)` and
/// every patched position to `between(patch, lo, hi)`. We push the range check into the
/// patches and rebuild a `Sparse<Bool>` with the new fill, preserving downstream sparsity.
///
/// Declines (falls back to canonical) unless both bounds are constants.
impl BetweenKernel for Sparse {
fn between(
array: ArrayView<'_, Self>,
lower: &ArrayRef,
upper: &ArrayRef,
options: &BetweenOptions,
ctx: &mut ExecutionCtx,
) -> VortexResult<Option<ArrayRef>> {
let (Some(lo), Some(hi)) = (lower.as_constant(), upper.as_constant()) else {
return Ok(None);
};

let patches = array.patches();

let fill_bool = ConstantArray::new(array.fill_scalar().clone(), 1)
.into_array()
.between(
ConstantArray::new(lo.clone(), 1).into_array(),
ConstantArray::new(hi.clone(), 1).into_array(),
options.clone(),
)?
.execute_scalar(0, ctx)?;

let new_patches = patches.map_values(|values| {
let len = values.len();
values.between(
ConstantArray::new(lo.clone(), len).into_array(),
ConstantArray::new(hi.clone(), len).into_array(),
options.clone(),
)
})?;

Ok(Some(
Sparse::try_new_from_patches(new_patches, fill_bool)?.into_array(),
))
}
}

#[cfg(test)]
mod tests {
use std::sync::LazyLock;

use rstest::rstest;
use vortex_array::Canonical;
use vortex_array::IntoArray;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::ConstantArray;
use vortex_array::assert_arrays_eq;
use vortex_array::builtins::ArrayBuiltins;
use vortex_array::scalar::Scalar;
use vortex_array::scalar_fn::fns::between::BetweenOptions;
use vortex_array::scalar_fn::fns::between::StrictComparison;
use vortex_array::session::ArraySession;
use vortex_buffer::buffer;
use vortex_session::VortexSession;

use crate::Sparse;
use crate::initialize;

static SESSION: LazyLock<VortexSession> = LazyLock::new(|| {
let session = VortexSession::empty().with::<ArraySession>();
initialize(&session);
session
});

#[rstest]
#[case(0i32, 100i32, StrictComparison::NonStrict, StrictComparison::NonStrict)]
#[case(5i32, 25i32, StrictComparison::Strict, StrictComparison::Strict)]
#[case(1i32, 20i32, StrictComparison::NonStrict, StrictComparison::Strict)]
fn between_matches_canonical(
#[case] lo: i32,
#[case] hi: i32,
#[case] lower_strict: StrictComparison,
#[case] upper_strict: StrictComparison,
) {
let array = Sparse::try_new(
buffer![1u64, 3, 5].into_array(),
buffer![10i32, 20, 30].into_array(),
8,
Scalar::from(1i32),
)
.unwrap()
.into_array();
let len = array.len();
let options = BetweenOptions {
lower_strict,
upper_strict,
};

let lower = ConstantArray::new(Scalar::from(lo), len).into_array();
let upper = ConstantArray::new(Scalar::from(hi), len).into_array();

let mut ctx = SESSION.create_execution_ctx();

// Kernel path: between pushes through the Sparse encoding.
let kernel = array
.clone()
.between(lower.clone(), upper.clone(), options.clone())
.unwrap()
.execute::<Canonical>(&mut ctx)
.unwrap();

// Baseline: canonicalize the input first so between runs on a PrimitiveArray.
let canonical_input = array.execute::<Canonical>(&mut ctx).unwrap().into_array();
let baseline = canonical_input
.between(lower, upper, options)
.unwrap()
.execute::<Canonical>(&mut ctx)
.unwrap();

assert_arrays_eq!(kernel, baseline);
}
}
Loading
Loading