Skip to content

Commit 757d355

Browse files
fix: coerce rhs Arrow type to match lhs when comparing nested arrays
When arrow_compare_arrays falls back to make_comparator for nested types (struct, list, etc.), both sides were independently converted to Arrow with target=None, letting each array choose its own nearest physical type. A struct child backed by VarBinArray would produce Binary while a VarBinViewArray child would produce BinaryView. Both represent the same logical DType::Binary, but the assert_eq! on the data types panicked. Fix: convert lhs first (target=None) then use its Arrow DataType as the target Field when converting rhs. This mirrors the non-nested fast path which uses Datum::try_new_with_target_datatype for the same reason, and removes the now-redundant assert. Add a regression test that directly reproduces the fuzz crash pattern: a struct with a VarBinArray child on the left and a VarBinViewArray child on the right. Fixes #7957 Signed-off-by: Claude <claude@anthropic.com> Co-authored-by: Joe Isaacs <joseph-isaacs@users.noreply.github.com>
1 parent 2fa51e2 commit 757d355

1 file changed

Lines changed: 36 additions & 8 deletions

File tree

vortex-array/src/scalar_fn/fns/binary/compare.rs

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use arrow_array::BooleanArray;
77
use arrow_buffer::NullBuffer;
88
use arrow_ord::cmp;
99
use arrow_ord::ord::make_comparator;
10+
use arrow_schema::Field;
1011
use arrow_schema::SortOptions;
1112
use vortex_error::VortexResult;
1213
use vortex_error::vortex_err;
@@ -155,15 +156,14 @@ fn arrow_compare_arrays(
155156
// For nested types, fall back to `make_comparator` which does element-wise comparison.
156157
let arrow_array: BooleanArray = if left.dtype().is_nested() || right.dtype().is_nested() {
157158
let session = ctx.session().clone();
158-
let rhs = session.arrow().execute_arrow(right.clone(), None, ctx)?;
159+
// Convert lhs first to determine the target Arrow type, then convert rhs using that
160+
// same type as the target. This mirrors the non-nested path and ensures both sides
161+
// have identical Arrow types even when the two Vortex arrays use different underlying
162+
// encodings for logically equivalent fields (e.g., VarBinArray → Binary vs
163+
// VarBinViewArray → BinaryView for the same DType::Binary field inside a struct).
159164
let lhs = session.arrow().execute_arrow(left.clone(), None, ctx)?;
160-
161-
assert!(
162-
lhs.data_type().equals_datatype(rhs.data_type()),
163-
"lhs data_type: {}, rhs data_type: {}",
164-
lhs.data_type(),
165-
rhs.data_type()
166-
);
165+
let target_field = Field::new("", lhs.data_type().clone(), right.dtype().is_nullable());
166+
let rhs = session.arrow().execute_arrow(right.clone(), Some(&target_field), ctx)?;
167167

168168
compare_nested_arrow_arrays(lhs.as_ref(), rhs.as_ref(), operator)?
169169
} else {
@@ -509,6 +509,34 @@ mod tests {
509509
assert_arrays_eq!(result, expected);
510510
}
511511

512+
/// Regression test: comparing struct arrays where the same logical field is backed by
513+
/// different Vortex encodings (VarBinArray vs VarBinViewArray) must not panic.
514+
/// Previously `arrow_compare_arrays` would assert that both sides had identical Arrow
515+
/// data types, which failed when independent conversions chose different physical types
516+
/// (Binary vs BinaryView) for the same logical DType::Binary field.
517+
#[test]
518+
fn struct_compare_mixed_binary_encodings() {
519+
// LHS: struct with a VarBinArray (offset-based) binary field
520+
let bin_field1 =
521+
VarBinArray::from(vec!["apple".as_bytes(), "banana".as_bytes(), "cherry".as_bytes()]);
522+
let struct1 = StructArray::from_fields(&[("data", bin_field1.into_array())]).unwrap();
523+
524+
// RHS: struct with a VarBinViewArray (view-based) binary field — same logical DType
525+
let bin_field2 = VarBinViewArray::from_iter_bin([
526+
"apple".as_bytes(),
527+
"banana".as_bytes(),
528+
"durian".as_bytes(),
529+
]);
530+
let struct2 = StructArray::from_fields(&[("data", bin_field2.into_array())]).unwrap();
531+
532+
let result = struct1
533+
.into_array()
534+
.binary(struct2.into_array(), Operator::Eq)
535+
.unwrap();
536+
let expected = BoolArray::from_iter([true, true, false]);
537+
assert_arrays_eq!(result, expected);
538+
}
539+
512540
/// Regression test: `scalar_cmp` must error when comparing scalars with incompatible
513541
/// extension types (e.g., timestamps with different time units) rather than silently
514542
/// returning a wrong result.

0 commit comments

Comments
 (0)