Skip to content
Merged
2 changes: 0 additions & 2 deletions encodings/sequence/src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,6 @@ fn encode_primitive_array<P: NativePType + Into<PValue> + CheckedAdd + CheckedSu
mod tests {
use std::sync::LazyLock;

#[expect(unused_imports)]

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😢

use itertools::Itertools;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::assert_arrays_eq;
Expand Down
15 changes: 5 additions & 10 deletions vortex-array/benches/dict_compare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,9 @@ fn bench_compare_primitive(bencher: divan::Bencher, (len, uniqueness): (usize, u
)
.unwrap();
let value = primitive_arr.as_slice::<i32>()[0];
let session = vortex_array::array_session();

bencher
.with_inputs(|| (&dict, session.create_execution_ctx()))
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
.bench_refs(|(dict, ctx)| {
dict.clone()
.into_array()
Expand All @@ -81,10 +80,9 @@ fn bench_compare_varbin(bencher: divan::Bencher, (len, uniqueness): (usize, usiz
.unwrap();
let bytes = varbin_arr.with_iterator(|i| i.next().unwrap().unwrap().to_vec());
let value = from_utf8(bytes.as_slice()).unwrap();
let session = vortex_array::array_session();

bencher
.with_inputs(|| (&dict, session.create_execution_ctx()))
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
.bench_refs(|(dict, ctx)| {
dict.clone()
.into_array()
Expand All @@ -105,10 +103,9 @@ fn bench_compare_varbinview(bencher: divan::Bencher, (len, uniqueness): (usize,
.unwrap();
let bytes = varbinview_arr.with_iterator(|i| i.next().unwrap().unwrap().to_vec());
let value = from_utf8(bytes.as_slice()).unwrap();
let session = vortex_array::array_session();

bencher
.with_inputs(|| (&dict, session.create_execution_ctx()))
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
.bench_refs(|(dict, ctx)| {
dict.clone()
.into_array()
Expand Down Expand Up @@ -144,10 +141,9 @@ fn bench_compare_sliced_dict_primitive(
.unwrap();
let dict = dict.into_array().slice(0..codes_len).unwrap();
let value = primitive_arr.as_slice::<i32>()[0];
let session = vortex_array::array_session();

bencher
.with_inputs(|| (&dict, session.create_execution_ctx()))
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
.bench_refs(|(dict, ctx)| {
dict.clone()
.apply(&eq(root(), lit(value)))
Expand All @@ -171,10 +167,9 @@ fn bench_compare_sliced_dict_varbinview(
let dict = dict.into_array().slice(0..codes_len).unwrap();
let bytes = varbin_arr.with_iterator(|i| i.next().unwrap().unwrap().to_vec());
let value = from_utf8(bytes.as_slice()).unwrap();
let session = vortex_array::array_session();

bencher
.with_inputs(|| (&dict, session.create_execution_ctx()))
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
.bench_refs(|(dict, ctx)| {
dict.clone()
.apply(&eq(root(), lit(value)))
Expand Down
47 changes: 20 additions & 27 deletions vortex-array/benches/dict_compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,29 +45,30 @@ where
T: NativePType,
StandardUniform: Distribution<T>,
{
let primitive_arr = gen_primitive_for_dict::<T>(len, unique_values);
let primitive_arr = gen_primitive_for_dict::<T>(len, unique_values).into_array();

bencher
.with_inputs(|| (&primitive_arr, SESSION.create_execution_ctx()))
.bench_refs(|(arr, ctx)| dict_encode(&arr.clone().into_array(), ctx));
.bench_refs(|(arr, ctx)| dict_encode(arr, ctx));
}

#[divan::bench(args = BENCH_ARGS)]
fn encode_varbin(bencher: Bencher, (len, unique_values): (usize, usize)) {
let varbin_arr = VarBinArray::from(gen_varbin_words(len, unique_values));
let varbin_arr = VarBinArray::from(gen_varbin_words(len, unique_values)).into_array();

bencher
.with_inputs(|| (&varbin_arr, SESSION.create_execution_ctx()))
.bench_refs(|(arr, ctx)| dict_encode(&arr.clone().into_array(), ctx));
.bench_refs(|(arr, ctx)| dict_encode(arr, ctx));
}

#[divan::bench(args = BENCH_ARGS)]
fn encode_varbinview(bencher: Bencher, (len, unique_values): (usize, usize)) {
let varbinview_arr = VarBinViewArray::from_iter_str(gen_varbin_words(len, unique_values));
let varbinview_arr =
VarBinViewArray::from_iter_str(gen_varbin_words(len, unique_values)).into_array();

bencher
.with_inputs(|| (&varbinview_arr, SESSION.create_execution_ctx()))
.bench_refs(|(arr, ctx)| dict_encode(&arr.clone().into_array(), ctx));
.bench_refs(|(arr, ctx)| dict_encode(arr, ctx));
}

#[divan::bench(types = [u8, f32, i64], args = BENCH_ARGS)]
Expand All @@ -76,13 +77,10 @@ where
T: NativePType,
StandardUniform: Distribution<T>,
{
let primitive_arr = gen_primitive_for_dict::<T>(len, unique_values);
let dict = dict_encode(
&primitive_arr.into_array(),
&mut SESSION.create_execution_ctx(),
)
.unwrap()
.into_array();
let primitive_arr = gen_primitive_for_dict::<T>(len, unique_values).into_array();
let dict = dict_encode(&primitive_arr, &mut SESSION.create_execution_ctx())
.unwrap()
.into_array();

bencher
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
Expand All @@ -91,13 +89,10 @@ where

#[divan::bench(args = BENCH_ARGS)]
fn decode_varbin(bencher: Bencher, (len, unique_values): (usize, usize)) {
let varbin_arr = VarBinArray::from(gen_varbin_words(len, unique_values));
let dict = dict_encode(
&varbin_arr.into_array(),
&mut SESSION.create_execution_ctx(),
)
.unwrap()
.into_array();
let varbin_arr = VarBinArray::from(gen_varbin_words(len, unique_values)).into_array();
let dict = dict_encode(&varbin_arr, &mut SESSION.create_execution_ctx())
.unwrap()
.into_array();

bencher
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
Expand All @@ -106,13 +101,11 @@ fn decode_varbin(bencher: Bencher, (len, unique_values): (usize, usize)) {

#[divan::bench(args = BENCH_ARGS)]
fn decode_varbinview(bencher: Bencher, (len, unique_values): (usize, usize)) {
let varbinview_arr = VarBinViewArray::from_iter_str(gen_varbin_words(len, unique_values));
let dict = dict_encode(
&varbinview_arr.into_array(),
&mut SESSION.create_execution_ctx(),
)
.unwrap()
.into_array();
let varbinview_arr =
VarBinViewArray::from_iter_str(gen_varbin_words(len, unique_values)).into_array();
let dict = dict_encode(&varbinview_arr, &mut SESSION.create_execution_ctx())
.unwrap()
.into_array();

bencher
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
Expand Down
20 changes: 6 additions & 14 deletions vortex-array/src/arrays/dict/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
use std::fmt::Display;
use std::fmt::Formatter;

use num_traits::AsPrimitive;
use smallvec::smallvec;
use vortex_buffer::BitBuffer;
use vortex_error::VortexExpect;
Expand Down Expand Up @@ -159,28 +160,19 @@ pub trait DictArrayExt: TypedArrayRef<Dict> + DictArraySlotsExt {
match codes_validity.bit_buffer() {
AllOr::All => {
match_each_integer_ptype!(codes_primitive.ptype(), |P| {
#[allow(
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
reason = "codes are non-negative indices; a negative signed code would wrap to a large usize and panic on the bounds-checked array index"
)]
for &idx in codes_primitive.as_slice::<P>() {
values_vec[idx as usize] = referenced_value;
for idx in codes_primitive.as_slice::<P>() {
let idxu: usize = idx.as_();
values_vec[idxu] = referenced_value;
}
});
}
AllOr::None => {}
AllOr::Some(mask) => {
match_each_integer_ptype!(codes_primitive.ptype(), |P| {
let codes = codes_primitive.as_slice::<P>();

#[allow(
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
reason = "codes are non-negative indices; a negative signed code would wrap to a large usize and panic on the bounds-checked array index"
)]
mask.set_indices().for_each(|idx| {
values_vec[codes[idx] as usize] = referenced_value;
let idxu: usize = codes[idx].as_();
values_vec[idxu] = referenced_value;
});
});
}
Expand Down
50 changes: 18 additions & 32 deletions vortex-array/src/arrays/dict/compute/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,15 @@ mod tests {
use std::sync::LazyLock;

use rstest::rstest;
use vortex_buffer::BitBuffer;
use vortex_buffer::buffer;
use vortex_session::VortexSession;

use crate::ArrayRef;
use crate::IntoArray;
#[expect(deprecated)]
use crate::ToCanonical as _;
use crate::VortexSessionExecute;
use crate::arrays::Dict;
use crate::arrays::DictArray;
use crate::arrays::PrimitiveArray;
use crate::arrays::dict::DictArraySlotsExt;
use crate::assert_arrays_eq;
Expand All @@ -66,13 +67,15 @@ mod tests {
use crate::dtype::DType;
use crate::dtype::Nullability;
use crate::dtype::PType;
use crate::validity::Validity;

static SESSION: LazyLock<VortexSession> = LazyLock::new(crate::array_session);

#[test]
fn test_cast_dict_to_wider_type() {
let ctx = &mut SESSION.create_execution_ctx();
let values = buffer![1i32, 2, 3, 2, 1].into_array();
let dict = dict_encode(&values, &mut SESSION.create_execution_ctx()).unwrap();
let dict = dict_encode(&values, ctx).unwrap();

let casted = dict
.into_array()
Expand All @@ -83,13 +86,8 @@ mod tests {
&DType::Primitive(PType::I64, Nullability::NonNullable)
);

#[expect(deprecated)]
let decoded = casted.to_primitive();
assert_arrays_eq!(
decoded,
PrimitiveArray::from_iter([1i64, 2, 3, 2, 1]),
&mut SESSION.create_execution_ctx()
);
let decoded = casted.into_array().execute::<PrimitiveArray>(ctx).unwrap();
assert_arrays_eq!(decoded, PrimitiveArray::from_iter([1i64, 2, 3, 2, 1]), ctx);
}

#[test]
Expand All @@ -110,9 +108,10 @@ mod tests {

#[test]
fn test_cast_dict_allvalid_to_nonnullable_and_back() {
let ctx = &mut SESSION.create_execution_ctx();
// Create an AllValid dict array (no nulls)
let values = buffer![10i32, 20, 30, 40].into_array();
let dict = dict_encode(&values, &mut SESSION.create_execution_ctx()).unwrap();
let dict = dict_encode(&values, ctx).unwrap();

// Verify initial state - codes should be NonNullable, values should be NonNullable
assert_eq!(dict.codes().dtype().nullability(), Nullability::NonNullable);
Expand Down Expand Up @@ -173,37 +172,30 @@ mod tests {
);

// Verify values are unchanged
#[expect(deprecated)]
let original_values = dict.as_array().to_primitive();
#[expect(deprecated)]
let final_values = back_to_non_nullable.to_primitive();
assert_arrays_eq!(
original_values,
final_values,
&mut SESSION.create_execution_ctx()
);
let original_values = dict.into_array().execute::<PrimitiveArray>(ctx).unwrap();

let final_values = back_to_non_nullable.execute::<PrimitiveArray>(ctx).unwrap();
assert_arrays_eq!(original_values, final_values, ctx);
}

#[rstest]
#[case(dict_encode(&buffer![1i32, 2, 3, 2, 1, 3].into_array(), &mut SESSION.create_execution_ctx()).unwrap().into_array())]
#[case(dict_encode(&buffer![100u32, 200, 100, 300, 200].into_array(), &mut SESSION.create_execution_ctx()).unwrap().into_array())]
#[case(dict_encode(&PrimitiveArray::from_option_iter([Some(1i32), None, Some(2), Some(1), None]).into_array(), &mut SESSION.create_execution_ctx()).unwrap().into_array())]
#[case(dict_encode(&buffer![1.5f32, 2.5, 1.5, 3.5].into_array(), &mut SESSION.create_execution_ctx()).unwrap().into_array())]
fn test_cast_dict_conformance(#[case] array: crate::ArrayRef) {
fn test_cast_dict_conformance(#[case] array: ArrayRef) {
test_cast_conformance(&array);
}

#[test]
fn test_cast_dict_with_unreferenced_null_values_to_nonnullable() {
use crate::arrays::DictArray;
use crate::validity::Validity;

let ctx = &mut SESSION.create_execution_ctx();
// Create a dict with nullable values that have unreferenced null entries.
// Values: [1.0, null, 3.0] (index 1 is null but no code points to it)
// Codes: [0, 2, 0] (only reference indices 0 and 2, never 1)
let values = PrimitiveArray::new(
buffer![1.0f64, 0.0f64, 3.0f64],
Validity::from(vortex_buffer::BitBuffer::from(vec![true, false, true])),
Validity::from(BitBuffer::from(vec![true, false, true])),
)
.into_array();
let codes = buffer![0u32, 2, 0].into_array();
Expand All @@ -228,12 +220,6 @@ mod tests {
casted.dtype(),
&DType::Primitive(PType::F64, Nullability::NonNullable)
);
#[expect(deprecated)]
let casted_prim = casted.to_primitive();
assert_arrays_eq!(
casted_prim,
PrimitiveArray::from_iter([1.0f64, 3.0, 1.0]),
&mut SESSION.create_execution_ctx()
);
assert_arrays_eq!(casted, PrimitiveArray::from_iter([1.0f64, 3.0, 1.0]), ctx);
}
}
34 changes: 20 additions & 14 deletions vortex-array/src/arrays/dict/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,11 @@ impl FilterReduce for Dict {
mod test {
use std::sync::LazyLock;

#[expect(unused_imports)]
use itertools::Itertools;
use vortex_buffer::buffer;
use vortex_session::VortexSession;

use crate::ArrayRef;
use crate::IntoArray;
#[expect(deprecated)]
use crate::ToCanonical as _;
use crate::VortexSessionExecute;
use crate::accessor::ArrayAccessor;
use crate::arrays::ConstantArray;
Expand Down Expand Up @@ -101,8 +97,10 @@ mod test {
&mut SESSION.create_execution_ctx(),
)
.unwrap();
#[expect(deprecated)]
let actual = dict.as_array().to_primitive();
let actual = dict
.into_array()
.execute::<PrimitiveArray>(&mut SESSION.create_execution_ctx())
.unwrap();

let expected = PrimitiveArray::from_option_iter(values);

Expand All @@ -119,9 +117,12 @@ mod test {
&expected.clone().into_array(),
&mut SESSION.create_execution_ctx(),
)
.unwrap();
#[expect(deprecated)]
let actual = dict.as_array().to_primitive();
.unwrap()
.into_array();

let actual = dict
.execute::<PrimitiveArray>(&mut SESSION.create_execution_ctx())
.unwrap();

assert_arrays_eq!(actual, expected, &mut ctx);
}
Expand All @@ -136,9 +137,12 @@ mod test {
&expected.clone().into_array(),
&mut SESSION.create_execution_ctx(),
)
.unwrap();
#[expect(deprecated)]
let actual = dict.as_array().to_primitive();
.unwrap()
.into_array();

let actual = dict
.execute::<PrimitiveArray>(&mut SESSION.create_execution_ctx())
.unwrap();

assert_arrays_eq!(actual, expected, &mut ctx);
}
Expand All @@ -155,8 +159,10 @@ mod test {
&mut SESSION.create_execution_ctx(),
)
.unwrap();
#[expect(deprecated)]
let flattened_dict = dict.as_array().to_varbinview();
let flattened_dict = dict
.into_array()
.execute::<VarBinViewArray>(&mut SESSION.create_execution_ctx())
.unwrap();
assert_eq!(
flattened_dict.with_iterator(|iter| iter
.map(|slice| slice.map(|s| s.to_vec()))
Expand Down
Loading
Loading