Skip to content

Commit 4a90e13

Browse files
authored
Patches have correct dtype by construction instead of normalised during array construction (#8626)
Patches::cast_values was a crutch and sign that in too many places we were producing wrong dtypes. This pr removes this function and fixes up dtypes where necessary. We still need a cast in ALP-RD take as ALP-RD patches must be non nullable
1 parent 79c6b0f commit 4a90e13

19 files changed

Lines changed: 135 additions & 222 deletions

File tree

encodings/alp/benches/alp_compress.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,16 +146,16 @@ fn compress_rd<T: ALPRDFloat + NativePType>(bencher: Bencher, args: (usize, f64)
146146
let encoder = RDEncoder::new(primitive.as_slice::<T>());
147147

148148
bencher
149-
.with_inputs(|| (&primitive, &encoder, SESSION.create_execution_ctx()))
150-
.bench_refs(|(primitive, encoder, ctx)| encoder.encode(primitive.as_view(), ctx))
149+
.with_inputs(|| (&primitive, &encoder))
150+
.bench_refs(|(primitive, encoder)| encoder.encode(primitive.as_view()))
151151
}
152152

153153
#[divan::bench(types = [f32, f64], args = RD_BENCH_ARGS)]
154154
fn decompress_rd<T: ALPRDFloat + NativePType>(bencher: Bencher, args: (usize, f64)) {
155155
let (n, fraction_patch) = args;
156156
let primitive = make_rd_array::<T>(n, fraction_patch);
157157
let encoder = RDEncoder::new(primitive.as_slice::<T>());
158-
let encoded = encoder.encode(primitive.as_view(), &mut SESSION.create_execution_ctx());
158+
let encoded = encoder.encode(primitive.as_view());
159159

160160
bencher
161161
.with_inputs(|| (&encoded, SESSION.create_execution_ctx()))

encodings/alp/src/alp/compute/mask.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,13 @@ impl MaskKernel for ALP {
3636
) -> VortexResult<Option<ArrayRef>> {
3737
let vortex_mask = Validity::Array(mask.not()?).execute_mask(array.len(), ctx)?;
3838
let masked_encoded = array.encoded().clone().mask(mask.clone())?;
39-
let masked_dtype = array
40-
.dtype()
41-
.with_nullability(masked_encoded.dtype().nullability());
4239
let masked_patches = array
4340
.patches()
4441
.map(|p| p.mask(&vortex_mask, ctx))
4542
.transpose()?
46-
.flatten()
47-
.map(|patches| patches.cast_values(&masked_dtype))
48-
.transpose()?;
43+
.flatten();
4944
Ok(Some(
50-
ALP::new(masked_encoded, array.exponents(), masked_patches).into_array(),
45+
ALP::try_new(masked_encoded, array.exponents(), masked_patches)?.into_array(),
5146
))
5247
}
5348
}

encodings/alp/src/alp/compute/take.rs

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,9 @@ impl TakeExecute for ALP {
2323
.patches()
2424
.map(|p| p.take(indices, ctx))
2525
.transpose()?
26-
.flatten()
27-
.map(|patches| {
28-
patches.cast_values(
29-
&array
30-
.dtype()
31-
.with_nullability(taken_encoded.dtype().nullability()),
32-
)
33-
})
34-
.transpose()?;
26+
.flatten();
3527
Ok(Some(
36-
ALP::new(taken_encoded, array.exponents(), taken_patches).into_array(),
28+
ALP::try_new(taken_encoded, array.exponents(), taken_patches)?.into_array(),
3729
))
3830
}
3931
}

encodings/alp/src/alp_rd/array.rs

Lines changed: 9 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,11 @@ use vortex_array::ArrayParts;
1717
use vortex_array::ArrayRef;
1818
use vortex_array::ArraySlots;
1919
use vortex_array::ArrayView;
20-
use vortex_array::Canonical;
2120
use vortex_array::EqMode;
2221
use vortex_array::ExecutionCtx;
2322
use vortex_array::ExecutionResult;
2423
use vortex_array::IntoArray;
25-
use vortex_array::LEGACY_SESSION;
2624
use vortex_array::TypedArrayRef;
27-
use vortex_array::VortexSessionExecute;
2825
use vortex_array::arrays::Primitive;
2926
use vortex_array::arrays::PrimitiveArray;
3027
use vortex_array::buffer::BufferHandle;
@@ -217,13 +214,6 @@ impl VTable for ALPRD {
217214
)
218215
})
219216
.transpose()?;
220-
// NOTE: `VTable::deserialize` has a fixed trait signature without `ExecutionCtx`, so we
221-
// cannot plumb a ctx in here. We construct a legacy ctx locally at this trait boundary.
222-
let left_parts_patches = ALPRDData::canonicalize_patches(
223-
&left_parts,
224-
left_parts_patches,
225-
&mut LEGACY_SESSION.create_execution_ctx(),
226-
)?;
227217
let slots = ALPRDData::make_slots(&left_parts, &right_parts, left_parts_patches.as_ref());
228218
let data = ALPRDData::new(
229219
left_parts_dictionary,
@@ -376,11 +366,8 @@ impl ALPRD {
376366
right_parts: ArrayRef,
377367
right_bit_width: u8,
378368
left_parts_patches: Option<Patches>,
379-
ctx: &mut ExecutionCtx,
380369
) -> VortexResult<ALPRDArray> {
381370
let len = left_parts.len();
382-
let left_parts_patches =
383-
ALPRDData::canonicalize_patches(&left_parts, left_parts_patches, ctx)?;
384371
let slots = ALPRDData::make_slots(&left_parts, &right_parts, left_parts_patches.as_ref());
385372
let data = ALPRDData::new(left_parts_dictionary, right_bit_width, left_parts_patches);
386373
Array::try_from_parts(ArrayParts::new(ALPRD, dtype, len, data).with_slots(slots))
@@ -408,28 +395,6 @@ impl ALPRD {
408395
}
409396

410397
impl ALPRDData {
411-
fn canonicalize_patches(
412-
left_parts: &ArrayRef,
413-
left_parts_patches: Option<Patches>,
414-
ctx: &mut ExecutionCtx,
415-
) -> VortexResult<Option<Patches>> {
416-
left_parts_patches
417-
.map(|patches| {
418-
if !patches.values().all_valid(ctx)? {
419-
vortex_bail!("patches must be all valid: {}", patches.values());
420-
}
421-
// TODO(ngates): assert the DType, don't cast it.
422-
// TODO(joe): assert the DType, don't cast it in the next PR.
423-
let mut patches = patches.cast_values(&left_parts.dtype().as_nonnullable())?;
424-
// Force execution of the lazy cast so patch values are materialized
425-
// before serialization.
426-
let canonical = patches.values().clone().execute::<Canonical>(ctx)?;
427-
*patches.values_mut() = canonical.into_array();
428-
Ok(patches)
429-
})
430-
.transpose()
431-
}
432-
433398
/// Build a new `ALPRDArray` from components.
434399
pub fn new(
435400
left_parts_dictionary: Buffer<u16>,
@@ -556,18 +521,16 @@ fn validate_parts(
556521
"patches array_len {} != outer len {len}",
557522
patches.array_len(),
558523
);
524+
// Left-parts exceptions are always all-valid and are stored as the non-nullable left-parts
525+
// dtype. Requiring that exact dtype (rather than ignoring nullability) means each
526+
// construction path must produce correct patches, removing the need to normalize them.
527+
// Non-nullable also implies all-valid, so no separate validity check is required.
528+
let expected = left_parts.dtype().as_nonnullable();
559529
vortex_ensure!(
560-
patches.dtype().eq_ignore_nullability(left_parts.dtype()),
561-
"patches dtype {} does not match left_parts dtype {}",
530+
patches.dtype() == &expected,
531+
"patches dtype {} must be the non-nullable left_parts dtype {}",
562532
patches.dtype(),
563-
left_parts.dtype(),
564-
);
565-
vortex_ensure!(
566-
patches
567-
.values()
568-
.all_valid(&mut LEGACY_SESSION.create_execution_ctx())?,
569-
"patches must be all valid: {}",
570-
patches.values()
533+
expected,
571534
);
572535
}
573536

@@ -672,7 +635,7 @@ mod test {
672635
// Pick a seed that we know will trigger lots of patches.
673636
let encoder: alp_rd::RDEncoder = alp_rd::RDEncoder::new(&[seed.powi(-2)]);
674637

675-
let rd_array = encoder.encode(real_array.as_view(), &mut ctx);
638+
let rd_array = encoder.encode(real_array.as_view());
676639

677640
let decoded = rd_array
678641
.as_array()

encodings/alp/src/alp_rd/compute/cast.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ mod tests {
6767
let values = vec![1.0f32, 1.1, 1.2, 1.3, 1.4];
6868
let arr = PrimitiveArray::from_iter(values.clone());
6969
let encoder = RDEncoder::new(&values);
70-
let alprd = encoder.encode(arr.as_view(), &mut ctx);
70+
let alprd = encoder.encode(arr.as_view());
7171

7272
let casted = alprd
7373
.into_array()
@@ -92,7 +92,7 @@ mod tests {
9292
PrimitiveArray::from_option_iter([Some(10.0f64), None, Some(10.1), Some(10.2), None]);
9393
let values = vec![10.0f64, 10.1, 10.2];
9494
let encoder = RDEncoder::new(&values);
95-
let alprd = encoder.encode(arr.as_view(), &mut ctx);
95+
let alprd = encoder.encode(arr.as_view());
9696

9797
// Cast to NonNullable should fail since we have nulls. The failure surfaces during
9898
// execution since the reduce path defers when the validity stat is not cached.
@@ -122,31 +122,31 @@ mod tests {
122122
let values = vec![1.23f32, 4.56, 7.89, 10.11, 12.13];
123123
let arr = PrimitiveArray::from_iter(values.clone());
124124
let encoder = RDEncoder::new(&values);
125-
encoder.encode(arr.as_view(), &mut array_session().create_execution_ctx())
125+
encoder.encode(arr.as_view())
126126
})]
127127
#[case::f64({
128128
let values = vec![100.1f64, 200.2, 300.3, 400.4, 500.5];
129129
let arr = PrimitiveArray::from_iter(values.clone());
130130
let encoder = RDEncoder::new(&values);
131-
encoder.encode(arr.as_view(), &mut array_session().create_execution_ctx())
131+
encoder.encode(arr.as_view())
132132
})]
133133
#[case::single({
134134
let values = vec![42.42f64];
135135
let arr = PrimitiveArray::from_iter(values.clone());
136136
let encoder = RDEncoder::new(&values);
137-
encoder.encode(arr.as_view(), &mut array_session().create_execution_ctx())
137+
encoder.encode(arr.as_view())
138138
})]
139139
#[case::negative({
140140
let values = vec![0.0f32, -1.5, 2.5, -3.5, 4.5];
141141
let arr = PrimitiveArray::from_iter(values.clone());
142142
let encoder = RDEncoder::new(&values);
143-
encoder.encode(arr.as_view(), &mut array_session().create_execution_ctx())
143+
encoder.encode(arr.as_view())
144144
})]
145145
#[case::nullable({
146146
let arr = PrimitiveArray::from_option_iter([Some(1.1f32), None, Some(2.2), Some(3.3), None]);
147147
let values = vec![1.1f32, 2.2, 3.3];
148148
let encoder = RDEncoder::new(&values);
149-
encoder.encode(arr.as_view(), &mut array_session().create_execution_ctx())
149+
encoder.encode(arr.as_view())
150150
})]
151151
fn test_cast_alprd_conformance(#[case] alprd: crate::alp_rd::ALPRDArray) {
152152
test_cast_conformance(&alprd.into_array());

encodings/alp/src/alp_rd/compute/filter.rs

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ impl FilterKernel for ALPRD {
3232
array.right_parts().filter(mask.clone())?,
3333
array.right_bit_width(),
3434
left_parts_exceptions,
35-
ctx,
3635
)?
3736
.into_array(),
3837
))
@@ -71,7 +70,7 @@ mod test {
7170
fn test_filter<T: ALPRDFloat>(#[case] a: T, #[case] b: T, #[case] outlier: T) {
7271
let mut ctx = SESSION.create_execution_ctx();
7372
let array = PrimitiveArray::new(buffer![a, b, outlier], Validity::NonNullable);
74-
let encoded = RDEncoder::new(&[a, b]).encode(array.as_view(), &mut ctx);
73+
let encoded = RDEncoder::new(&[a, b]).encode(array.as_view());
7574

7675
// Make sure that we're testing the exception pathway.
7776
assert!(encoded.left_parts_patches().is_some());
@@ -87,13 +86,9 @@ mod test {
8786
#[case(0.1f32, 0.2f32, 3e25f32)]
8887
#[case(0.1f64, 0.2f64, 3e100f64)]
8988
fn test_filter_simple<T: ALPRDFloat>(#[case] a: T, #[case] b: T, #[case] outlier: T) {
90-
let mut ctx = SESSION.create_execution_ctx();
9189
test_filter_conformance(
9290
&RDEncoder::new(&[a, b])
93-
.encode(
94-
PrimitiveArray::from_iter([a, b, outlier, b, outlier]).as_view(),
95-
&mut ctx,
96-
)
91+
.encode(PrimitiveArray::from_iter([a, b, outlier, b, outlier]).as_view())
9792
.into_array(),
9893
);
9994
}
@@ -102,13 +97,11 @@ mod test {
10297
#[case(0.1f32, 3e25f32)]
10398
#[case(0.5f64, 1e100f64)]
10499
fn test_filter_with_nulls<T: ALPRDFloat>(#[case] a: T, #[case] outlier: T) {
105-
let mut ctx = SESSION.create_execution_ctx();
106100
test_filter_conformance(
107101
&RDEncoder::new(&[a])
108102
.encode(
109103
PrimitiveArray::from_option_iter([Some(a), None, Some(outlier), Some(a), None])
110104
.as_view(),
111-
&mut ctx,
112105
)
113106
.into_array(),
114107
);

encodings/alp/src/alp_rd/compute/mask.rs

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
use vortex_array::ArrayRef;
55
use vortex_array::ArrayView;
66
use vortex_array::IntoArray;
7-
use vortex_array::LEGACY_SESSION;
8-
use vortex_array::VortexSessionExecute;
97
use vortex_array::arrays::scalar_fn::ScalarFnFactoryExt;
108
use vortex_array::scalar_fn::EmptyOptions;
119
use vortex_array::scalar_fn::fns::mask::Mask as MaskExpr;
@@ -22,8 +20,6 @@ impl MaskReduce for ALPRD {
2220
EmptyOptions,
2321
[array.left_parts().clone(), mask.clone()],
2422
)?;
25-
// NOTE: `MaskReduce::mask` has a fixed trait signature without `ExecutionCtx`, so we
26-
// construct a legacy ctx locally at this trait boundary.
2723
Ok(Some(
2824
ALPRD::try_new(
2925
array.dtype().as_nullable(),
@@ -32,7 +28,6 @@ impl MaskReduce for ALPRD {
3228
array.right_parts().clone(),
3329
array.right_bit_width(),
3430
array.left_parts_patches(),
35-
&mut LEGACY_SESSION.create_execution_ctx(),
3631
)?
3732
.into_array(),
3833
))
@@ -43,8 +38,6 @@ impl MaskReduce for ALPRD {
4338
mod tests {
4439
use rstest::rstest;
4540
use vortex_array::IntoArray;
46-
use vortex_array::VortexSessionExecute;
47-
use vortex_array::array_session;
4841
use vortex_array::arrays::PrimitiveArray;
4942
use vortex_array::compute::conformance::mask::test_mask_conformance;
5043

@@ -55,13 +48,9 @@ mod tests {
5548
#[case(0.1f32, 0.2f32, 3e25f32)]
5649
#[case(0.1f64, 0.2f64, 3e100f64)]
5750
fn test_mask_simple<T: ALPRDFloat>(#[case] a: T, #[case] b: T, #[case] outlier: T) {
58-
let mut ctx = array_session().create_execution_ctx();
5951
test_mask_conformance(
6052
&RDEncoder::new(&[a, b])
61-
.encode(
62-
PrimitiveArray::from_iter([a, b, outlier, b, outlier]).as_view(),
63-
&mut ctx,
64-
)
53+
.encode(PrimitiveArray::from_iter([a, b, outlier, b, outlier]).as_view())
6554
.into_array(),
6655
);
6756
}
@@ -70,13 +59,11 @@ mod tests {
7059
#[case(0.1f32, 3e25f32)]
7160
#[case(0.5f64, 1e100f64)]
7261
fn test_mask_with_nulls<T: ALPRDFloat>(#[case] a: T, #[case] outlier: T) {
73-
let mut ctx = array_session().create_execution_ctx();
7462
test_mask_conformance(
7563
&RDEncoder::new(&[a])
7664
.encode(
7765
PrimitiveArray::from_option_iter([Some(a), None, Some(outlier), Some(a), None])
7866
.as_view(),
79-
&mut ctx,
8067
)
8168
.into_array(),
8269
);

0 commit comments

Comments
 (0)