Skip to content

Commit 08472bb

Browse files
robert3005claude
andcommitted
Dispatch list appends through dyn ArrayBuilder methods
Replace the ListBuilder/ListViewBuilder downcast-enumeration macros with two dyn-dispatched ArrayBuilder methods, append_list_array and append_listview_array, taking ArrayViews. The builders resolve their own offset/size generics, so every IntegerPType (signed included) works, and either list encoding can append into either list builder. List-encoded sources now append directly from the ListArray layout without canonicalizing to ListViewArray first. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com> Signed-off-by: Robert Kruszewski <github@robertk.io>
1 parent de99886 commit 08472bb

5 files changed

Lines changed: 334 additions & 226 deletions

File tree

vortex-array/src/arrays/list/vtable/mod.rs

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ use vortex_session::registry::CachedId;
1616
use crate::ArrayEq;
1717
use crate::ArrayHash;
1818
use crate::ArrayRef;
19-
use crate::Canonical;
2019
use crate::EqMode;
2120
use crate::ExecutionCtx;
2221
use crate::ExecutionResult;
@@ -37,7 +36,6 @@ use crate::arrays::list::compute::rules::PARENT_RULES;
3736
use crate::arrays::listview::list_view_from_list;
3837
use crate::buffer::BufferHandle;
3938
use crate::builders::ArrayBuilder;
40-
use crate::builders::match_each_list_builder;
4139
use crate::dtype::DType;
4240
use crate::dtype::Nullability;
4341
use crate::dtype::PType;
@@ -208,14 +206,7 @@ impl VTable for List {
208206
builder: &mut dyn ArrayBuilder,
209207
ctx: &mut ExecutionCtx,
210208
) -> VortexResult<()> {
211-
let array = array.into_owned().into_array();
212-
// A directly-supplied `ListBuilder` (not the canonical builder for `DType::List`).
213-
match_each_list_builder!(&array, builder, ctx);
214-
// The canonical builder is `ListViewBuilder`; canonicalize and dispatch to `ListView`.
215-
array
216-
.execute::<Canonical>(ctx)?
217-
.into_array()
218-
.append_to_builder(builder, ctx)
209+
builder.append_list_array(array, ctx)
219210
}
220211
}
221212

vortex-array/src/arrays/listview/vtable/mod.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ use crate::ArrayRef;
2121
use crate::EqMode;
2222
use crate::ExecutionCtx;
2323
use crate::ExecutionResult;
24-
use crate::IntoArray;
2524
use crate::array::Array;
2625
use crate::array::ArrayId;
2726
use crate::array::ArrayView;
@@ -37,7 +36,6 @@ use crate::arrays::listview::array::SLOT_NAMES;
3736
use crate::arrays::listview::compute::rules::PARENT_RULES;
3837
use crate::buffer::BufferHandle;
3938
use crate::builders::ArrayBuilder;
40-
use crate::builders::match_each_listview_builder;
4139
use crate::dtype::DType;
4240
use crate::dtype::Nullability;
4341
use crate::dtype::PType;
@@ -233,12 +231,7 @@ impl VTable for ListView {
233231
builder: &mut dyn ArrayBuilder,
234232
ctx: &mut ExecutionCtx,
235233
) -> VortexResult<()> {
236-
let array = array.into_owned().into_array();
237-
match_each_listview_builder!(&array, builder, ctx);
238-
vortex_bail!(
239-
"append_to_builder for ListView requires a ListViewBuilder, got a builder for {}",
240-
builder.dtype()
241-
)
234+
builder.append_listview_array(array, ctx)
242235
}
243236

244237
fn reduce_parent(

vortex-array/src/builders/list.rs

Lines changed: 142 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
use std::any::Any;
55
use std::sync::Arc;
66

7+
use num_traits::AsPrimitive;
78
use vortex_error::VortexExpect;
89
use vortex_error::VortexResult;
910
use vortex_error::vortex_bail;
@@ -15,9 +16,13 @@ use crate::ArrayRef;
1516
use crate::Canonical;
1617
use crate::ExecutionCtx;
1718
use crate::IntoArray;
19+
use crate::array::ArrayView;
20+
use crate::arrays::List;
1821
use crate::arrays::ListArray;
22+
use crate::arrays::ListView;
1923
use crate::arrays::ListViewArray;
2024
use crate::arrays::PrimitiveArray;
25+
use crate::arrays::list::ListArrayExt;
2126
use crate::arrays::listview::ListViewArrayExt;
2227
use crate::builders::ArrayBuilder;
2328
use crate::builders::DEFAULT_BUILDER_CAPACITY;
@@ -168,90 +173,51 @@ impl<O: IntegerPType> ListBuilder<O> {
168173

169174
element_dtype
170175
}
176+
}
171177

172-
/// Appends the values of a list-typed `array` to the builder, canonicalizing to a
173-
/// [`ListViewArray`] and converting into the `ListArray` (`n + 1` offsets) layout.
174-
///
175-
/// [`ListBuilder`] is not the canonical builder for [`DType::List`] (that is
176-
/// [`ListViewBuilder`](crate::builders::ListViewBuilder)), so it is never produced by
177-
/// [`builder_with_capacity`]. List encodings still dispatch into it via `match_each_list_builder`
178-
/// when a caller supplies one directly.
179-
pub fn append_list_array(
180-
&mut self,
181-
array: &ArrayRef,
182-
ctx: &mut ExecutionCtx,
183-
) -> VortexResult<()> {
184-
let list = array.clone().execute::<ListViewArray>(ctx)?;
185-
if list.is_empty() {
186-
return Ok(());
178+
/// Appends `ListViewArray`-layout lists (`n` offsets and sizes) into a [`ListBuilder`], converting
179+
/// into the `ListArray` (`n + 1` offsets) layout.
180+
fn extend_from_listview<O, OffsetType, SizeType>(
181+
builder: &mut ListBuilder<O>,
182+
new_elements: &ArrayRef,
183+
new_offsets: &[OffsetType],
184+
new_sizes: &[SizeType],
185+
ctx: &mut ExecutionCtx,
186+
) -> VortexResult<()>
187+
where
188+
O: IntegerPType,
189+
OffsetType: IntegerPType,
190+
SizeType: IntegerPType,
191+
{
192+
let num_lists = new_offsets.len();
193+
debug_assert_eq!(num_lists, new_sizes.len());
194+
195+
let mut curr_offset = builder.elements_builder.len();
196+
let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
197+
198+
// We need to append each list individually, converting from `ListViewArray` format to
199+
// the `ListArray` format that `ListBuilder` expects.
200+
for i in 0..new_offsets.len() {
201+
let offset: usize = new_offsets[i].as_();
202+
let size: usize = new_sizes[i].as_();
203+
204+
if size > 0 {
205+
let list_elements = new_elements
206+
.slice(offset..offset + size)
207+
.vortex_expect("list builder slice");
208+
list_elements.append_to_builder(builder.elements_builder.as_mut(), ctx)?;
209+
curr_offset += size;
187210
}
188211

189-
// Append validity information.
190-
self.nulls
191-
.append_validity_mask(&list.validity()?.execute_mask(list.len(), ctx)?);
192-
193-
// Note that `ListViewArray` has `n` offsets and sizes, not `n+1` offsets like `ListArray`.
194-
let elements = list.elements();
195-
let offsets = list.offsets().clone().execute::<PrimitiveArray>(ctx)?;
196-
let sizes = list.sizes().clone().execute::<PrimitiveArray>(ctx)?;
197-
198-
fn extend_inner<O, OffsetType, SizeType>(
199-
builder: &mut ListBuilder<O>,
200-
new_elements: &ArrayRef,
201-
new_offsets: &[OffsetType],
202-
new_sizes: &[SizeType],
203-
ctx: &mut ExecutionCtx,
204-
) -> VortexResult<()>
205-
where
206-
O: IntegerPType,
207-
OffsetType: IntegerPType,
208-
SizeType: IntegerPType,
209-
{
210-
let num_lists = new_offsets.len();
211-
debug_assert_eq!(num_lists, new_sizes.len());
212-
213-
let mut curr_offset = builder.elements_builder.len();
214-
let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
215-
216-
// We need to append each list individually, converting from `ListViewArray` format to
217-
// the `ListArray` format that `ListBuilder` expects.
218-
for i in 0..new_offsets.len() {
219-
let offset: usize = new_offsets[i].as_();
220-
let size: usize = new_sizes[i].as_();
221-
222-
if size > 0 {
223-
let list_elements = new_elements
224-
.slice(offset..offset + size)
225-
.vortex_expect("list builder slice");
226-
list_elements.append_to_builder(builder.elements_builder.as_mut(), ctx)?;
227-
curr_offset += size;
228-
}
229-
230-
let new_offset =
231-
O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
232-
233-
offsets_range.set_value(i, new_offset);
234-
}
235-
236-
// SAFETY: We have initialized all `num_lists` values, and since the `offsets` array is
237-
// non-nullable, we are done.
238-
unsafe { offsets_range.finish() };
239-
Ok(())
240-
}
212+
let new_offset = O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
241213

242-
match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
243-
match_each_integer_ptype!(sizes.ptype(), |SizeType| {
244-
extend_inner(
245-
self,
246-
elements,
247-
offsets.as_slice::<OffsetType>(),
248-
sizes.as_slice::<SizeType>(),
249-
ctx,
250-
)?
251-
})
252-
});
253-
Ok(())
214+
offsets_range.set_value(i, new_offset);
254215
}
216+
217+
// SAFETY: We have initialized all `num_lists` values, and since the `offsets` array is
218+
// non-nullable, we are done.
219+
unsafe { offsets_range.finish() };
220+
Ok(())
255221
}
256222

257223
impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
@@ -325,6 +291,82 @@ impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
325291
.vortex_expect("list builder should canonicalize to listview");
326292
Canonical::List(listview)
327293
}
294+
295+
fn append_list_array(
296+
&mut self,
297+
array: ArrayView<'_, List>,
298+
ctx: &mut ExecutionCtx,
299+
) -> VortexResult<()> {
300+
if array.is_empty() {
301+
return Ok(());
302+
}
303+
304+
self.nulls
305+
.append_validity_mask(&array.validity()?.execute_mask(array.len(), ctx)?);
306+
307+
let num_lists = array.len();
308+
let offsets = array.offsets().clone().execute::<PrimitiveArray>(ctx)?;
309+
match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
310+
let offsets = offsets.as_slice::<OffsetType>();
311+
let first: usize = offsets[0].as_();
312+
let last: usize = offsets[num_lists].as_();
313+
314+
// Lists in a `ListArray` are contiguous, so the referenced elements can be appended
315+
// in bulk and the offsets rebased onto this builder's elements.
316+
let elements_base = self.elements_builder.len();
317+
if last > first {
318+
array
319+
.elements()
320+
.slice(first..last)?
321+
.append_to_builder(self.elements_builder.as_mut(), ctx)?;
322+
}
323+
324+
let mut offsets_range = self.offsets_builder.uninit_range(num_lists);
325+
for i in 0..num_lists {
326+
let end: usize = offsets[i + 1].as_();
327+
offsets_range.set_value(
328+
i,
329+
O::from_usize(end - first + elements_base)
330+
.vortex_expect("Failed to convert offset"),
331+
);
332+
}
333+
// SAFETY: We have initialized all `num_lists` values, and since the `offsets` array is
334+
// non-nullable, we are done.
335+
unsafe { offsets_range.finish() };
336+
});
337+
Ok(())
338+
}
339+
340+
fn append_listview_array(
341+
&mut self,
342+
array: ArrayView<'_, ListView>,
343+
ctx: &mut ExecutionCtx,
344+
) -> VortexResult<()> {
345+
if array.is_empty() {
346+
return Ok(());
347+
}
348+
349+
self.nulls
350+
.append_validity_mask(&array.validity()?.execute_mask(array.len(), ctx)?);
351+
352+
// Note that `ListViewArray` has `n` offsets and sizes, not `n+1` offsets like `ListArray`.
353+
let elements = array.elements();
354+
let offsets = array.offsets().clone().execute::<PrimitiveArray>(ctx)?;
355+
let sizes = array.sizes().clone().execute::<PrimitiveArray>(ctx)?;
356+
357+
match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
358+
match_each_integer_ptype!(sizes.ptype(), |SizeType| {
359+
extend_from_listview(
360+
self,
361+
elements,
362+
offsets.as_slice::<OffsetType>(),
363+
sizes.as_slice::<SizeType>(),
364+
ctx,
365+
)?
366+
})
367+
});
368+
Ok(())
369+
}
328370
}
329371

330372
#[cfg(test)]
@@ -470,13 +512,15 @@ mod tests {
470512
let mut ctx = array_session().create_execution_ctx();
471513

472514
let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
473-
builder.append_list_array(&list, &mut ctx).unwrap();
474-
builder.append_list_array(&list, &mut ctx).unwrap();
475-
builder
476-
.append_list_array(&list.slice(0..0).unwrap(), &mut ctx)
515+
list.append_to_builder(&mut builder, &mut ctx).unwrap();
516+
list.append_to_builder(&mut builder, &mut ctx).unwrap();
517+
list.slice(0..0)
518+
.unwrap()
519+
.append_to_builder(&mut builder, &mut ctx)
477520
.unwrap();
478-
builder
479-
.append_list_array(&list.slice(1..3).unwrap(), &mut ctx)
521+
list.slice(1..3)
522+
.unwrap()
523+
.append_to_builder(&mut builder, &mut ctx)
480524
.unwrap();
481525

482526
let expected = ListArray::from_iter_opt_slow::<O, _, _>(
@@ -543,22 +587,32 @@ mod tests {
543587
list.append_to_builder(listview_builder.as_mut(), &mut ctx)?;
544588
assert_arrays_eq!(listview_builder.finish(), list, &mut ctx);
545589

546-
// Offset/size types are enumerated, not assumed: a `ListViewBuilder` with non-`u64`
547-
// offset and size types must work for both source encodings.
590+
// A `ListViewBuilder` with non-`u64` (including signed) offset and size types must work
591+
// for both source encodings.
548592
let mut lv_u32_u8 = ListViewBuilder::<u32, u8>::with_capacity(elem_dtype(), Nullable, 8, 4);
549593
list.append_to_builder(&mut lv_u32_u8, &mut ctx)?;
550594
assert_arrays_eq!(lv_u32_u8.finish(), list, &mut ctx);
551595

596+
let mut lv_i32_i16 =
597+
ListViewBuilder::<i32, i16>::with_capacity(elem_dtype(), Nullable, 8, 4);
598+
list.append_to_builder(&mut lv_i32_i16, &mut ctx)?;
599+
assert_arrays_eq!(lv_i32_i16.finish(), list, &mut ctx);
600+
552601
let mut lv_u16_u16 =
553602
ListViewBuilder::<u16, u16>::with_capacity(elem_dtype(), Nullable, 8, 4);
554603
listview.append_to_builder(&mut lv_u16_u16, &mut ctx)?;
555604
assert_arrays_eq!(lv_u16_u16.finish(), list, &mut ctx);
556605

557-
// A `List`-encoded array appended into a `ListBuilder` with a non-`u64` offset type.
606+
// Both source encodings appended into `ListBuilder`s with non-`u64` (including signed)
607+
// offset types.
558608
let mut list_builder = ListBuilder::<u32>::with_capacity(elem_dtype(), Nullable, 8, 4);
559609
list.append_to_builder(&mut list_builder, &mut ctx)?;
560610
assert_arrays_eq!(list_builder.finish(), list, &mut ctx);
561611

612+
let mut list_builder_i16 = ListBuilder::<i16>::with_capacity(elem_dtype(), Nullable, 8, 4);
613+
listview.append_to_builder(&mut list_builder_i16, &mut ctx)?;
614+
assert_arrays_eq!(list_builder_i16.finish(), list, &mut ctx);
615+
562616
Ok(())
563617
}
564618

0 commit comments

Comments
 (0)