Skip to content

Commit e5affee

Browse files
authored
Merge branch 'develop' into adamg/better-sort-pushdown
2 parents f2811a7 + a9a3c27 commit e5affee

4 files changed

Lines changed: 45 additions & 57 deletions

File tree

vortex-file/src/strategy.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
//! This module defines the default layout strategy for a Vortex file.
55
6+
use std::num::NonZeroUsize;
67
use std::sync::Arc;
78
use std::sync::LazyLock;
89

@@ -35,6 +36,7 @@ use vortex_btrblocks::schemes::integer::IntDictScheme;
3536
use vortex_bytebool::ByteBool;
3637
use vortex_datetime_parts::DateTimeParts;
3738
use vortex_decimal_byte_parts::DecimalByteParts;
39+
use vortex_error::VortexExpect;
3840
use vortex_fastlanes::BitPacked;
3941
use vortex_fastlanes::Delta;
4042
use vortex_fastlanes::FoR;
@@ -297,7 +299,7 @@ impl WriteStrategyBuilder {
297299
dict,
298300
compress_then_flat.clone(),
299301
ZonedLayoutOptions {
300-
block_size: self.row_block_size,
302+
block_size: NonZeroUsize::new(self.row_block_size).vortex_expect("must be non 0"),
301303
..Default::default()
302304
},
303305
);

vortex-layout/src/layouts/zoned/mod.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ mod schema;
1818
pub mod writer;
1919
pub mod zone_map;
2020

21+
use std::num::NonZeroUsize;
2122
use std::sync::Arc;
2223

2324
pub(crate) use builder::AggregateStatsAccumulator;
@@ -319,11 +320,9 @@ impl ZonedLayout {
319320
pub fn try_new(
320321
data: LayoutRef,
321322
zones: LayoutRef,
322-
zone_len: usize,
323+
zone_len: NonZeroUsize,
323324
aggregate_fns: Arc<[AggregateFnRef]>,
324325
) -> VortexResult<Self> {
325-
vortex_ensure!(zone_len > 0, "Zone length must be greater than 0");
326-
327326
let expected_dtype = aggregate_stats_table_dtype(data.dtype(), &aggregate_fns);
328327
if zones.dtype() != &expected_dtype {
329328
vortex_bail!("Invalid zone map layout: zones dtype does not match expected dtype");
@@ -333,7 +332,7 @@ impl ZonedLayout {
333332
Ok(Self {
334333
dtype: data.dtype().clone(),
335334
children: OwnedLayoutChildren::layout_children(vec![data, zones]),
336-
zone_len,
335+
zone_len: zone_len.get(),
337336
zone_map_schema: ZoneMapSchema::AggregateFns(aggregate_fns),
338337
stats_table_dtype: expected_dtype,
339338
})
@@ -540,7 +539,7 @@ mod tests {
540539
fn test_metadata_serialization_preserves_aggregate_options() -> VortexResult<()> {
541540
let aggregate_fn = BoundedMax.bind(BoundedMaxOptions {
542541
// SAFETY: 128 is non-zero.
543-
max_bytes: unsafe { std::num::NonZeroUsize::new_unchecked(128) },
542+
max_bytes: unsafe { NonZeroUsize::new_unchecked(128) },
544543
});
545544
let metadata = ZonedMetadata {
546545
zone_len: 314,

vortex-layout/src/layouts/zoned/reader.rs

Lines changed: 4 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ impl LayoutReader for ZonedReader {
219219

220220
#[cfg(test)]
221221
mod test {
222+
use std::num::NonZeroUsize;
222223
use std::sync::Arc;
223224

224225
use rstest::fixture;
@@ -237,6 +238,7 @@ mod test {
237238
use vortex_array::expr::root;
238239
use vortex_array::validity::Validity;
239240
use vortex_buffer::buffer;
241+
use vortex_error::VortexExpect;
240242
use vortex_error::VortexResult;
241243
use vortex_io::runtime::Handle;
242244
use vortex_io::runtime::single::block_on;
@@ -283,7 +285,7 @@ mod test {
283285
ChunkedLayoutStrategy::new(FlatLayoutStrategy::default()),
284286
FlatLayoutStrategy::default(),
285287
ZonedLayoutOptions {
286-
block_size: 3,
288+
block_size: NonZeroUsize::new(3).vortex_expect("non zero"),
287289
..Default::default()
288290
},
289291
);
@@ -370,7 +372,7 @@ mod test {
370372
ChunkedLayoutStrategy::new(FlatLayoutStrategy::default()),
371373
FlatLayoutStrategy::default(),
372374
ZonedLayoutOptions {
373-
block_size: 3,
375+
block_size: NonZeroUsize::new(3).vortex_expect("non zero"),
374376
..Default::default()
375377
},
376378
);
@@ -470,33 +472,4 @@ mod test {
470472
Ok(())
471473
})
472474
}
473-
474-
#[test]
475-
fn test_writer_rejects_zero_block_size() {
476-
let ctx = ArrayContext::empty();
477-
let segments = Arc::new(TestSegments::default());
478-
let (ptr, eof) = SequenceId::root().split();
479-
let strategy = ZonedStrategy::new(
480-
ChunkedLayoutStrategy::new(FlatLayoutStrategy::default()),
481-
FlatLayoutStrategy::default(),
482-
ZonedLayoutOptions {
483-
block_size: 0,
484-
..Default::default()
485-
},
486-
);
487-
let array_stream = ChunkedArray::from_iter([buffer![1, 2, 3].into_array()])
488-
.into_array()
489-
.to_array_stream()
490-
.sequenced(ptr);
491-
let segments2 = Arc::<TestSegments>::clone(&segments);
492-
493-
let result = block_on(|handle| async move {
494-
let session = session_with_handle(handle);
495-
strategy
496-
.write_stream(ctx, segments2, array_stream, eof, &session)
497-
.await
498-
});
499-
500-
assert!(result.is_err());
501-
}
502475
}

vortex-layout/src/layouts/zoned/writer.rs

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// SPDX-License-Identifier: Apache-2.0
44
// SPDX-FileCopyrightText: Copyright the Vortex contributors
55

6+
use std::num::NonZeroUsize;
67
use std::sync::Arc;
78

89
use async_trait::async_trait;
@@ -26,9 +27,11 @@ use vortex_array::aggregate_fn::fns::nan_count::NanCount;
2627
use vortex_array::aggregate_fn::fns::null_count::NullCount;
2728
use vortex_array::aggregate_fn::fns::sum::Sum;
2829
use vortex_array::dtype::DType;
30+
use vortex_error::VortexError;
2931
use vortex_error::VortexResult;
30-
use vortex_error::vortex_ensure;
32+
use vortex_io::session::RuntimeSessionExt;
3133
use vortex_session::VortexSession;
34+
use vortex_utils::parallelism::get_available_parallelism;
3235

3336
use crate::IntoLayout;
3437
use crate::LayoutRef;
@@ -50,18 +53,23 @@ use crate::sequence::SequentialStreamExt;
5053
/// possibly the final partial zone.
5154
pub struct ZonedLayoutOptions {
5255
/// The size of a statistics block
53-
pub block_size: usize,
56+
pub block_size: NonZeroUsize,
5457
/// The aggregate partials to collect for each block.
5558
///
5659
/// If unset, the writer chooses pruning aggregates from the input dtype.
5760
pub aggregate_fns: Option<Arc<[AggregateFnRef]>>,
61+
/// Number of chunks to compute aggregate partials in parallel.
62+
pub concurrency: NonZeroUsize,
5863
}
5964

6065
impl Default for ZonedLayoutOptions {
6166
fn default() -> Self {
6267
Self {
63-
block_size: 8192,
68+
block_size: unsafe { NonZeroUsize::new_unchecked(8192) },
6469
aggregate_fns: None,
70+
concurrency: unsafe {
71+
NonZeroUsize::new_unchecked(get_available_parallelism().unwrap_or(1))
72+
},
6573
}
6674
}
6775
}
@@ -97,38 +105,44 @@ impl LayoutStrategy for ZonedStrategy {
97105
mut eof: SequencePointer,
98106
session: &VortexSession,
99107
) -> VortexResult<LayoutRef> {
100-
vortex_ensure!(
101-
self.options.block_size > 0,
102-
"ZonedStrategy requires block_size > 0 when writing"
103-
);
104-
105108
let aggregate_fns = self
106109
.options
107110
.aggregate_fns
108111
.clone()
109112
.unwrap_or_else(|| default_zoned_aggregate_fns(stream.dtype()));
110-
let session = session.clone();
113+
let compute_session = session.clone();
111114

112115
let stats_accumulator = Arc::new(Mutex::new(AggregateStatsAccumulator::new(
113116
stream.dtype(),
114117
&aggregate_fns,
115118
)));
116119
let aggregate_fns = stats_accumulator.lock().aggregate_fns();
117120

121+
let stream_dtype = stream.dtype().clone();
122+
let concurrency = self.options.concurrency.get();
123+
let stream = stream
124+
.map(move |item| {
125+
let aggregate_fns = Arc::clone(&aggregate_fns);
126+
let session = compute_session.clone();
127+
session.handle().spawn_cpu(move || {
128+
let (sequence_id, chunk) = item?;
129+
let partials = aggregate_partials(
130+
&chunk,
131+
&aggregate_fns,
132+
&mut session.create_execution_ctx(),
133+
)?;
134+
Ok::<_, VortexError>((sequence_id, chunk, partials))
135+
})
136+
})
137+
.buffered(concurrency);
138+
118139
// Accumulate zone stats in stream order so the auxiliary table stays aligned with the
119140
// data child.
120141
let stats_accumulator2 = Arc::clone(&stats_accumulator);
121-
let aggregate_fns2 = Arc::clone(&aggregate_fns);
122-
let compute_session = session.clone();
123142
let stream = SequentialStreamAdapter::new(
124-
stream.dtype().clone(),
143+
stream_dtype,
125144
stream.map(move |item| {
126-
let (sequence_id, chunk) = item?;
127-
let partials = aggregate_partials(
128-
&chunk,
129-
&aggregate_fns2,
130-
&mut compute_session.create_execution_ctx(),
131-
)?;
145+
let (sequence_id, chunk, partials) = item?;
132146
stats_accumulator2.lock().push_partials(partials)?;
133147
Ok((sequence_id, chunk))
134148
}),
@@ -146,7 +160,7 @@ impl LayoutStrategy for ZonedStrategy {
146160
Arc::clone(&segment_sink),
147161
stream,
148162
data_eof,
149-
&session,
163+
session,
150164
)
151165
.await?;
152166

@@ -164,7 +178,7 @@ impl LayoutStrategy for ZonedStrategy {
164178
.sequenced(eof.split_off());
165179
let zones_layout = self
166180
.stats
167-
.write_stream(ctx, Arc::clone(&segment_sink), stats_stream, eof, &session)
181+
.write_stream(ctx, Arc::clone(&segment_sink), stats_stream, eof, session)
168182
.await?;
169183

170184
Ok(

0 commit comments

Comments
 (0)