Skip to content

Commit ab8cd8c

Browse files
authored
fix: NaN semantics in GROUP BY (apache#16256)
1 parent 448c985 commit ab8cd8c

2 files changed

Lines changed: 11 additions & 1 deletion

File tree

datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
use crate::aggregates::group_values::multi_group_by::{nulls_equal_to, GroupColumn};
1919
use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
20+
use arrow::array::ArrowNativeTypeOp;
2021
use arrow::array::{cast::AsArray, Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray};
2122
use arrow::buffer::ScalarBuffer;
2223
use arrow::datatypes::DataType;
@@ -121,7 +122,7 @@ impl<T: ArrowPrimitiveType, const NULLABLE: bool> GroupColumn
121122
// Otherwise, we need to check their values
122123
}
123124

124-
*equal_to_result = self.group_values[lhs_row] == array.value(rhs_row);
125+
*equal_to_result = self.group_values[lhs_row].is_eq(array.value(rhs_row));
125126
}
126127
}
127128

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7018,3 +7018,12 @@ VALUES
70187018
);
70197019
----
70207020
{a: 1, b: 2, c: 3} {a: 1, b: 2, c: 4}
7021+
7022+
query TI
7023+
SELECT column1, COUNT(DISTINCT column2) FROM (
7024+
VALUES
7025+
('x', arrow_cast('NAN','Float64')),
7026+
('x', arrow_cast('NAN','Float64'))
7027+
) GROUP BY 1 ORDER BY 1;
7028+
----
7029+
x 1

0 commit comments

Comments
 (0)