Skip to content

Commit f091924

Browse files
committed
initial
Signed-off-by: Mikhail Kot <mikhail@spiraldb.com>
1 parent 2a5c895 commit f091924

7 files changed

Lines changed: 226 additions & 136 deletions

File tree

vortex-duckdb/cpp/include/duckdb_vx/vector.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ DUCKDB_INCLUDES_END
1717
extern "C" {
1818
#endif
1919

20+
// Create a vector that slices another vector between a pair of offsets [offset, end];
21+
duckdb_vector duckdb_vx_vector_slice(duckdb_vector ffi_vector, idx_t offset, idx_t end);
22+
2023
/// Slice the vector to a new dictionary vector, using the current vector's values and
2124
/// the provided selection vector.
2225
///

vortex-duckdb/cpp/vector.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@ DUCKDB_INCLUDES_END
1515

1616
using namespace duckdb;
1717

18+
extern "C" duckdb_vector duckdb_vx_vector_slice(duckdb_vector ffi_vector, idx_t offset, idx_t end) {
19+
const Vector &vector = *reinterpret_cast<Vector *>(ffi_vector);
20+
Vector *const sliced = new Vector(vector, offset, end);
21+
return reinterpret_cast<duckdb_vector>(sliced);
22+
}
23+
1824
extern "C" void duckdb_vx_vector_slice_to_dictionary(duckdb_vector ffi_vector,
1925
duckdb_selection_vector ffi_sel_vec,
2026
idx_t selection_vector_length) {

vortex-duckdb/src/convert/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ mod scalar;
77
mod table_filter;
88
mod vector;
99

10+
#[cfg(test)]
11+
pub use dtype::FromLogicalType;
1012
pub use dtype::from_duckdb_table;
1113
pub use expr::try_from_bound_expression;
1214
pub use scalar::*;

vortex-duckdb/src/duckdb/vector.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ impl Vector {
6262
pub fn with_capacity(logical_type: &LogicalTypeRef, len: usize) -> Self {
6363
unsafe { Self::own(cpp::duckdb_create_vector(logical_type.as_ptr(), len as _)) }
6464
}
65+
66+
pub fn slice(other: &VectorRef, offset: u64, end: u64) -> Self {
67+
unsafe { Self::own(cpp::duckdb_vx_vector_slice(other.as_ptr(), offset, end)) }
68+
}
6569
}
6670

6771
impl VectorRef {
@@ -307,6 +311,10 @@ impl VectorRef {
307311
unsafe { Vector::borrow_mut(cpp::duckdb_array_vector_get_child(self.as_ptr())) }
308312
}
309313

314+
pub fn list_vector_get_size(&self) -> u64 {
315+
unsafe { cpp::duckdb_list_vector_get_size(self.as_ptr()) }
316+
}
317+
310318
pub fn list_vector_set_size(&self, size: u64) -> VortexResult<()> {
311319
let state = unsafe { cpp::duckdb_list_vector_set_size(self.as_ptr(), size) };
312320
match state {

vortex-duckdb/src/e2e_test/vortex_scan_test.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -982,8 +982,10 @@ fn test_vortex_encodings_roundtrip() {
982982
assert_eq!(list_entries[4].offset, 10);
983983

984984
// Get child vector and verify actual values
985+
let list_child_len = list_vec.list_vector_get_size();
986+
assert_eq!(list_child_len, 10);
985987
let list_child = list_vec.list_vector_get_child();
986-
let child_values = list_child.as_slice_with_len::<i32>(10); // 10 total child elements
988+
let child_values = list_child.as_slice_with_len::<i32>(list_child_len.as_());
987989
assert_eq!(child_values, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
988990

989991
// Verify fixed-size list column (column 9)

vortex-duckdb/src/exporter/list.rs

Lines changed: 122 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ use vortex::array::arrays::list::ListDataParts;
1212
use vortex::array::match_each_integer_ptype;
1313
use vortex::array::validity::Validity;
1414
use vortex::dtype::IntegerPType;
15+
use vortex::error::VortexExpect;
1516
use vortex::error::VortexResult;
16-
use vortex::error::vortex_err;
1717
use vortex::mask::Mask;
1818

1919
use super::ConversionCache;
@@ -110,40 +110,49 @@ impl<O: IntegerPType> ColumnExporter for ListExporter<O> {
110110
vector: &mut VectorRef,
111111
_ctx: &mut ExecutionCtx,
112112
) -> VortexResult<()> {
113+
if len == 0 {
114+
return vector.list_vector_set_size(0);
115+
}
116+
113117
// SAFETY: TODO(connor): Pretty sure that `export` needs to be `unsafe`.
114118
let duckdb_list_views: &mut [cpp::duckdb_list_entry] =
115119
unsafe { vector.as_slice_mut::<cpp::duckdb_list_entry>(len) };
116120
debug_assert_eq!(duckdb_list_views.len(), len);
117121

118-
if len > 0 {
119-
let offsets = &self.offsets.as_slice::<O>()[offset..offset + len + 1];
120-
debug_assert_eq!(offsets.len(), len + 1);
122+
let offsets = &self.offsets.as_slice::<O>()[offset..offset + len + 1];
123+
assert_eq!(offsets.len(), len + 1);
121124

122-
for i in 0..len {
123-
let offset = offsets[i]
124-
.to_u64()
125-
.ok_or_else(|| vortex_err!("somehow unable to convert an offset to u64"))?;
126-
let length = (offsets[i + 1] - offsets[i])
127-
.to_u64()
128-
.ok_or_else(|| vortex_err!("somehow unable to convert an offset to u64"))?;
125+
let offset_start = offsets[0].to_u64().vortex_expect("conversion failed");
126+
let offset_end = offsets[len].to_u64().vortex_expect("conversion failed");
127+
assert!(offset_end <= self.num_elements as u64);
129128

130-
debug_assert!(offset + length <= self.num_elements as u64);
131-
132-
duckdb_list_views[i] = cpp::duckdb_list_entry { offset, length };
133-
}
129+
for i in 0..len {
130+
let offset = offsets[i].to_u64().vortex_expect("conversion failed");
131+
let length = (offsets[i + 1] - offsets[i])
132+
.to_u64()
133+
.vortex_expect("conversion failed");
134+
let offset = offset - offset_start;
135+
duckdb_list_views[i] = cpp::duckdb_list_entry { offset, length };
134136
}
135137

136-
let child = vector.list_vector_get_child_mut();
137-
child.reference(&self.duckdb_elements.lock());
138+
let sliced = {
139+
let elements = &self.duckdb_elements.lock();
140+
Vector::slice(elements, offset_start, offset_end)
141+
};
138142

139-
vector.list_vector_set_size(self.num_elements as u64)?;
143+
let child_len = offset_end - offset_start;
144+
// runend is the case where len < child_len, see test_export_runend_list
145+
assert!(len as u64 <= child_len);
140146

147+
vector.list_vector_get_child_mut().reference(&sliced);
148+
vector.list_vector_set_size(child_len)?;
141149
Ok(())
142150
}
143151
}
144152

145153
#[cfg(test)]
146154
mod tests {
155+
use num_traits::AsPrimitive;
147156
use vortex::array::IntoArray as _;
148157
use vortex::array::VortexSessionExecute;
149158
use vortex::array::arrays::VarBinArray;
@@ -153,34 +162,34 @@ mod tests {
153162
use vortex::dtype::DType;
154163
use vortex::dtype::PType;
155164
use vortex::encodings::runend::RunEnd;
156-
use vortex::error::VortexExpect;
157165
use vortex::error::VortexResult;
158166

159167
use super::*;
160168
use crate::SESSION;
169+
use crate::convert::FromLogicalType;
161170
use crate::duckdb::DataChunk;
162171
use crate::duckdb::LogicalType;
163172
use crate::exporter::new_array_exporter;
164173

165174
#[test]
166-
fn test_export_empty_list() {
175+
fn test_export_empty_list() -> VortexResult<()> {
167176
let list = ListArray::try_new(
168177
Buffer::<u32>::empty().into_array(),
169178
buffer![0u32].into_array(),
170179
Validity::AllValid,
171-
)
172-
.vortex_expect("list creation should succeed")
180+
)?
173181
.into_array();
174182

175-
let list_type = LogicalType::list_type(LogicalType::uint32())
176-
.vortex_expect("LogicalTypeRef creation should succeed for test data");
183+
let list_type = LogicalType::list_type(LogicalType::uint32())?;
177184
let mut chunk = DataChunk::new([list_type]);
178185

179186
let mut ctx = SESSION.create_execution_ctx();
180-
new_array_exporter(list, &ConversionCache::default(), &mut ctx)
181-
.unwrap()
182-
.export(0, 0, chunk.get_vector_mut(0), &mut ctx)
183-
.unwrap();
187+
new_array_exporter(list, &ConversionCache::default(), &mut ctx)?.export(
188+
0,
189+
0,
190+
chunk.get_vector_mut(0),
191+
&mut ctx,
192+
)?;
184193
chunk.set_len(0);
185194

186195
assert_eq!(
@@ -189,16 +198,16 @@ mod tests {
189198
- FLAT UINTEGER[]: 0 = [ ]
190199
"#
191200
);
201+
Ok(())
192202
}
193203

194204
#[test]
195-
fn test_export_u64_list() {
205+
fn test_export_u64_list() -> VortexResult<()> {
196206
let list = ListArray::try_new(
197207
buffer![1u64, 2, 3, 4, 5].into_array(),
198208
buffer![0u8, 1, 2, 3, 4, 5].into_array(),
199209
Validity::AllValid,
200-
)
201-
.vortex_expect("list creation should succeed")
210+
)?
202211
.into_array();
203212
assert_eq!(
204213
list.dtype(),
@@ -208,15 +217,16 @@ mod tests {
208217
)
209218
);
210219

211-
let list_type = LogicalType::list_type(LogicalType::uint64())
212-
.vortex_expect("LogicalTypeRef creation should succeed for test data");
220+
let list_type = LogicalType::list_type(LogicalType::uint64())?;
213221
let mut chunk = DataChunk::new([list_type]);
214222

215223
let mut ctx = SESSION.create_execution_ctx();
216-
new_array_exporter(list, &ConversionCache::default(), &mut ctx)
217-
.unwrap()
218-
.export(0, 5, chunk.get_vector_mut(0), &mut ctx)
219-
.unwrap();
224+
new_array_exporter(list, &ConversionCache::default(), &mut ctx)?.export(
225+
0,
226+
5,
227+
chunk.get_vector_mut(0),
228+
&mut ctx,
229+
)?;
220230
chunk.set_len(5);
221231

222232
assert_eq!(
@@ -225,24 +235,75 @@ mod tests {
225235
- FLAT UBIGINT[]: 5 = [ [1], [2], [3], [4], [5]]
226236
"#
227237
);
238+
Ok(())
228239
}
229240

230-
// Ensure runend-compressed list is properly flattened
231241
#[test]
232-
fn test_export_list_with_runend_elements() -> VortexResult<()> {
242+
fn test_export_u64_list_two_pass() -> VortexResult<()> {
243+
// [1], [2, 8], [3], [4], [5]
244+
let elements = buffer![1u64, 2, 8, 3, 4, 5].into_array();
245+
let offsets = buffer![0u8, 1, 3, 4, 5, 6].into_array();
246+
let list = ListArray::try_new(elements, offsets, Validity::AllValid)?.into_array();
247+
248+
let u64_type = LogicalType::uint64();
249+
let list_type = LogicalType::list_type(u64_type)?;
250+
let mut chunk = DataChunk::new([list_type]);
251+
233252
let mut ctx = SESSION.create_execution_ctx();
234-
let elements = RunEnd::encode(buffer![100u32, 100, 200, 200, 200].into_array(), &mut ctx)?;
253+
let exporter = new_array_exporter(list, &ConversionCache::default(), &mut ctx)?;
235254

236-
let list = ListArray::try_new(
237-
elements.into_array(),
238-
buffer![0u32, 2, 5].into_array(),
239-
Validity::AllValid,
240-
)
241-
.vortex_expect("list creation should succeed")
242-
.into_array();
255+
exporter.export(0, 2, chunk.get_vector_mut(0), &mut ctx)?;
256+
chunk.set_len(2);
257+
258+
assert_eq!(
259+
format!("{}", String::try_from(&*chunk).unwrap()),
260+
r#"Chunk - [1 Columns]
261+
- FLAT UBIGINT[]: 2 = [ [1], [2, 8]]
262+
"#
263+
);
264+
265+
let u64_type = LogicalType::uint64();
266+
let list_vec = chunk.get_vector(0);
267+
let list_child = list_vec.list_vector_get_child();
268+
assert_eq!(
269+
DType::from_logical_type(&list_child.logical_type(), false.into())?,
270+
DType::from_logical_type(&u64_type, false.into())?
271+
);
272+
let child_len: usize = list_vec.list_vector_get_size().as_();
273+
assert_eq!(child_len, 3);
274+
let child_values = list_child.as_slice_with_len::<u64>(child_len);
275+
assert_eq!(child_values, [1, 2, 8]);
276+
277+
exporter.export(2, 3, chunk.get_vector_mut(0), &mut ctx)?;
278+
chunk.set_len(3);
279+
280+
assert_eq!(
281+
format!("{}", String::try_from(&*chunk).unwrap()),
282+
r#"Chunk - [1 Columns]
283+
- FLAT UBIGINT[]: 3 = [ [3], [4], [5]]
284+
"#
285+
);
286+
287+
let list_vec = chunk.get_vector(0);
288+
let child_len: usize = list_vec.list_vector_get_size().as_();
289+
assert_eq!(child_len, 3);
290+
let list_child = list_vec.list_vector_get_child();
291+
let child_values = list_child.as_slice_with_len::<u64>(child_len);
292+
assert_eq!(child_values, [3, 4, 5]);
243293

244-
let list_type = LogicalType::list_type(LogicalType::uint32())
245-
.vortex_expect("LogicalTypeRef creation should succeed for test data");
294+
Ok(())
295+
}
296+
297+
#[test]
298+
fn test_export_runend_list() -> VortexResult<()> {
299+
let mut ctx = SESSION.create_execution_ctx();
300+
let elements_buffer = buffer![100f32, 100f32, 200f32, 200f32, 200f32].into_array();
301+
let elements = RunEnd::encode(elements_buffer, &mut ctx)?.into_array();
302+
303+
let list_buffer = buffer![0u32, 2, 5].into_array();
304+
let list = ListArray::try_new(elements, list_buffer, Validity::AllValid)?.into_array();
305+
306+
let list_type = LogicalType::list_type(LogicalType::float32())?;
246307
let mut chunk = DataChunk::new([list_type]);
247308

248309
new_array_exporter(list, &ConversionCache::default(), &mut ctx)?.export(
@@ -256,15 +317,15 @@ mod tests {
256317
assert_eq!(
257318
format!("{}", String::try_from(&*chunk)?),
258319
r#"Chunk - [1 Columns]
259-
- FLAT UINTEGER[]: 2 = [ [100, 100], [200, 200, 200]]
320+
- FLAT FLOAT[]: 2 = [ [100.0, 100.0], [200.0, 200.0, 200.0]]
260321
"#
261322
);
262323

263324
Ok(())
264325
}
265326

266327
#[test]
267-
fn test_export_non_empty_list_of_strings() {
328+
fn test_export_string_list() -> VortexResult<()> {
268329
let list = ListArray::try_new(
269330
<VarBinArray as FromIterator<_>>::from_iter([
270331
Some("abc"),
@@ -275,19 +336,19 @@ mod tests {
275336
.into_array(),
276337
buffer![0u8, 1, 2, 3, 4].into_array(),
277338
Validity::from_iter([true, true, false, true]),
278-
)
279-
.vortex_expect("list creation should succeed")
339+
)?
280340
.into_array();
281341

282-
let list_type = LogicalType::list_type(LogicalType::varchar())
283-
.vortex_expect("LogicalTypeRef creation should succeed for test data");
342+
let list_type = LogicalType::list_type(LogicalType::varchar())?;
284343
let mut chunk = DataChunk::new([list_type]);
285344

286345
let mut ctx = SESSION.create_execution_ctx();
287-
new_array_exporter(list, &ConversionCache::default(), &mut ctx)
288-
.unwrap()
289-
.export(0, 4, chunk.get_vector_mut(0), &mut ctx)
290-
.unwrap();
346+
new_array_exporter(list, &ConversionCache::default(), &mut ctx)?.export(
347+
0,
348+
4,
349+
chunk.get_vector_mut(0),
350+
&mut ctx,
351+
)?;
291352
chunk.set_len(4);
292353

293354
assert_eq!(
@@ -296,5 +357,7 @@ mod tests {
296357
- FLAT VARCHAR[]: 4 = [ [abc], [def], NULL, [ghi]]
297358
"#
298359
);
360+
361+
Ok(())
299362
}
300363
}

0 commit comments

Comments
 (0)