diff --git a/doc/developer/design/20241204_wmr_type_casts.md b/doc/developer/design/20241204_wmr_type_casts.md new file mode 100644 index 0000000000000..bf016507f9b40 --- /dev/null +++ b/doc/developer/design/20241204_wmr_type_casts.md @@ -0,0 +1,152 @@ +# WMR CTE Type Casts + +## The Problem + +The code on `main` prevents users from declaring a WMR CTE with a `numeric` +column if the corresponding value returned from the query uses a different +scale, even though SQL trivially supports the operation. + +More generally, WMR's reliance on `ScalarType`'s `PartialEq` implementation +to determine type compatibility is more underpowered than it needs to be. + +## Success Criteria + +The following SQL logic test passes: + +``` +statement ok +CREATE TABLE y (a BIGINT); + +statement ok +INSERT INTO y VALUES (1); + +query T +WITH MUTUALLY RECURSIVE + bar(x NUMERIC) as (SELECT sum(a) FROM y) +SELECT x FROM bar +---- +1 + +query T +WITH MUTUALLY RECURSIVE + bar(x NUMERIC) as (SELECT sum(a) FROM y) +SELECT pg_typeof(x) FROM bar +---- +numeric + +query T +WITH MUTUALLY RECURSIVE + bar(x NUMERIC) as (SELECT sum(a) + 1.23456 FROM y) +SELECT x FROM bar +---- +2.23456 + +query T +WITH MUTUALLY RECURSIVE + bar(x NUMERIC(38,2)) as (SELECT sum(a) + 1.23456 FROM y) +SELECT x FROM bar +---- +2.23 +``` + +## Out of Scope + +Treating untyped string literals as "untyped string literals" rater than `TEXT` +values. This could be done at a later point in time. + +## Solution Proposal + +We can cast the derived `RelationExpr`'s output types to match those that the +user proposed in their statement using `CastContext::Assignment`. + +## Minimal Viable Prototype + +See the changed files bundled in this PR. + +## Alternatives + +### What kind of cast? + +In the most simplistic terms, we have options for how this coercion should +occur: + +- No coercion +- Implicit casts +- Assignment casts + +#### A quick note on casts + +The difference between cast contexts (implicit, assignment, explicit) is how +destructive the cast may be. Implicit casts are not permitted to introduce any +"destructive" behavior whatsoever, assignment casts allow truncation of +different varieties, and explicit casts allow the most radical transformations +(e.g. moving between type categories). + +#### Assignment casts (preferred) + +Because WMR statements require users to supply an explicit schema for the CTEs, +we should honor that schema with assignment casts. + +This means that if a user specifies that a column in a WMR CTE is of a type that +also contains a typmod (e.g. `numeric(38,2)`), we should impose this typmod on +all values returned from the CTE. + +At first I thought this made the CTE too much like a relation, but I validated +that transient relations do carry typmods into their output with the following +example in PG. + +```sql +CREATE FUNCTION inspect_type(IN TEXT, IN TEXT) + RETURNS TEXT + LANGUAGE SQL + IMMUTABLE + RETURNS NULL ON NULL INPUT + AS $$SELECT + format_type(id, typmod) +FROM + ( + SELECT + atttypid AS id, atttypmod AS typmod + FROM + pg_attribute AS att + JOIN pg_class AS class ON att.attrelid = class.oid + WHERE + class.relname = $1 AND att.attname = $2 + ) + AS r;$$; + +CREATE TABLE x (a) AS SELECT 1.234::numeric(38,2)`; + +SELECT inexpect_type('x', 'a'); +---- +numeric(38,2) +``` + +#### Implicit casts + +Implicit casts are non-destructive and permissive. For example, implicitly +casting `numeric(38,10)` to `numeric(38,2)` (such as adding two values of these +types) will produce `numeric(38,10)`––there is no way of performing the +truncation expressed by the typmod to the higher-scaled value. Because of this, +I don't think the implicit context is appropriate to use when casting the +derived relation to the proposed relation. + +If this were the desired behavior, I would recommend we disallow typmods on the +column definitions for WMR CTEs. + +However, this then introduces wrinkles in dealing with custom types––we disallow +explicit typmods, but what about custom types? + +#### No coercion + +Another option here would be to perform no coercion but allow more types. For +example, we could disallow typmods in WMR CTE definitions, but allow any type +that passes the `ScalarType::base_eq` check. + +For example, if you declared the return type as `numeric`, it would let you +return any `numeric` type irrespective of its scale, e.g. `numeric(38,0)`, +`numeric(38,2)`. + +This might be the "easiest" to implement, but introduces a type of +casting/coercion behavior not used anywhere else. This seems unergonomic to +introduce and without clear benefit. diff --git a/src/repr/src/relation.rs b/src/repr/src/relation.rs index d18df7f372843..6049e67e250fa 100644 --- a/src/repr/src/relation.rs +++ b/src/repr/src/relation.rs @@ -214,36 +214,6 @@ impl RelationType { } } - /// True if any collection described by `self` could safely be described by `other`. - /// - /// In practice this means checking that the scalar types match exactly, and that the - /// nullability of `self` is at least as strict as `other`, and that all keys of `other` - /// contain some key of `self` (as a set of key columns is less strict than any subset). - pub fn subtypes(&self, other: &RelationType) -> bool { - let all_keys = other.keys.iter().all(|key1| { - self.keys - .iter() - .any(|key2| key1.iter().all(|k| key2.contains(k))) - }); - if !all_keys { - return false; - } - - if self.column_types.len() != other.column_types.len() { - return false; - } - - for (col1, col2) in self.column_types.iter().zip(other.column_types.iter()) { - if col1.nullable && !col2.nullable { - return false; - } - if col1.scalar_type != col2.scalar_type { - return false; - } - } - true - } - /// Returns all the [`ColumnType`]s, in order, for this relation. pub fn columns(&self) -> &[ColumnType] { &self.column_types diff --git a/src/sql/src/plan/error.rs b/src/sql/src/plan/error.rs index 22b7cbde0e959..5d9a8a290ef5a 100644 --- a/src/sql/src/plan/error.rs +++ b/src/sql/src/plan/error.rs @@ -368,6 +368,9 @@ impl PlanError { } } } + Self::RecursiveTypeMismatch(..) => { + Some("You will need to rewrite or cast the query's expressions.".into()) + }, _ => None, } } @@ -520,7 +523,7 @@ impl fmt::Display for PlanError { let declared = separated(", ", declared); let inferred = separated(", ", inferred); let name = name.quoted(); - write!(f, "declared type ({declared}) of WITH MUTUALLY RECURSIVE query {name} did not match inferred type ({inferred})") + write!(f, "WITH MUTUALLY RECURSIVE query {name} declared types ({declared}), but query returns types ({inferred})") }, Self::UnknownFunction {name, arg_types, ..} => { write!(f, "function {}({}) does not exist", name, arg_types.join(", ")) diff --git a/src/sql/src/plan/query.rs b/src/sql/src/plan/query.rs index dafba942ae5f5..35d25d365e3cc 100644 --- a/src/sql/src/plan/query.rs +++ b/src/sql/src/plan/query.rs @@ -1428,31 +1428,63 @@ pub fn plan_ctes( // Plan all CTEs and validate the proposed types. for cte in ctes.iter() { - let cte_name = normalize::ident(cte.name.clone()); let (val, _scope) = plan_nested_query(qcx, &cte.query)?; + + let proposed_typ = qcx.ctes[&cte.id].desc.typ(); + + if proposed_typ.column_types.iter().any(|c| !c.nullable) { + // Once WMR CTEs support NOT NULL constraints, check that + // nullability of derived column types are compatible. + sql_bail!("[internal error]: WMR CTEs do not support NOT NULL constraints on proposed column types"); + } + + if !proposed_typ.keys.is_empty() { + // Once WMR CTEs support keys, check that keys exactly + // overlap. + sql_bail!("[internal error]: WMR CTEs do not support keys"); + } + // Validate that the derived and proposed types are the same. - let typ = qcx.relation_type(&val); - // TODO: Use implicit casts to convert among types rather than error. - if !typ.subtypes(qcx.ctes[&cte.id].desc.typ()) { - let declared_typ = qcx.ctes[&cte.id] - .desc - .typ() + let derived_typ = qcx.relation_type(&val); + + let type_err = |proposed_typ: &RelationType, derived_typ: RelationType| { + let cte_name = normalize::ident(cte.name.clone()); + let proposed_typ = proposed_typ .column_types .iter() .map(|ty| qcx.humanize_scalar_type(&ty.scalar_type)) .collect::>(); - let inferred_typ = typ + let inferred_typ = derived_typ .column_types .iter() .map(|ty| qcx.humanize_scalar_type(&ty.scalar_type)) .collect::>(); Err(PlanError::RecursiveTypeMismatch( cte_name, - declared_typ, + proposed_typ, inferred_typ, - ))?; + )) + }; + + if derived_typ.column_types.len() != proposed_typ.column_types.len() { + return type_err(proposed_typ, derived_typ); } + // Cast dervied types to proposed types or error. + let val = match cast_relation( + qcx, + // Choose `CastContext::Assignment`` because the user has + // been explicit about the types they expect. Choosing + // `CastContext::Implicit` is not "strong" enough to impose + // typmods from proposed types onto values. + CastContext::Assignment, + val, + proposed_typ.column_types.iter().map(|c| &c.scalar_type), + ) { + Ok(val) => val, + Err(_) => return type_err(proposed_typ, derived_typ), + }; + result.push((cte.id, val, shadowed_descs.remove(&cte.id))); } } diff --git a/test/sqllogictest/with_mutually_recursive.slt b/test/sqllogictest/with_mutually_recursive.slt index 3c3cc88e644ae..24a37afc22068 100644 --- a/test/sqllogictest/with_mutually_recursive.slt +++ b/test/sqllogictest/with_mutually_recursive.slt @@ -158,21 +158,21 @@ SELECT (SELECT COUNT(*) FROM foo) FROM bar; ## Test error cases ## Test a recursive query with mismatched types. -statement error did not match inferred type +statement error db error: ERROR: WITH MUTUALLY RECURSIVE query "bar" declared types \(integer\), but query returns types \(text\) WITH MUTUALLY RECURSIVE foo (a text, b int) AS (SELECT 1, 2 UNION SELECT a, 7 FROM bar), bar (a int) as (SELECT a FROM foo) SELECT * FROM bar; ## Test with fewer columns than declared -statement error did not match inferred type +statement error db error: ERROR: WITH MUTUALLY RECURSIVE query "foo" declared types \(integer, integer\), but query returns types \(integer\) WITH MUTUALLY RECURSIVE foo (a int, b int) AS (SELECT 1 UNION SELECT a FROM bar), bar (a int) as (SELECT a FROM foo) SELECT a FROM foo, bar; ## Test with more columns than declared -statement error did not match inferred type +statement error db error: ERROR: WITH MUTUALLY RECURSIVE query "foo" declared types \(integer, integer\), but query returns types \(integer, integer, integer\) WITH MUTUALLY RECURSIVE foo (a int, b int) AS (SELECT 1, 2, 3 UNION SELECT a, 5, 6 FROM bar), bar (a int) as (SELECT a FROM foo) @@ -211,14 +211,6 @@ WITH MUTUALLY RECURSIVE bar (a int) as (SELECT a FROM foo) SELECT a FROM foo, bar; -## Test incompatible declared and inferred types -statement error db error: ERROR: declared type \(integer, text\) of WITH MUTUALLY RECURSIVE query "forever" did not match inferred type \(bigint, text\) -WITH MUTUALLY RECURSIVE - forever (i int, y text) as ( - SELECT COUNT(*), 'oops' FROM mz_views UNION (SELECT i + 1, 'oops' FROM forever) - ) -SELECT * FROM forever; - # Tests for nested WITH MUTUALLY RECURSIVE statement ok diff --git a/test/sqllogictest/with_mututally_recursive_types.slt b/test/sqllogictest/with_mututally_recursive_types.slt new file mode 100644 index 0000000000000..2c034c309edf6 --- /dev/null +++ b/test/sqllogictest/with_mututally_recursive_types.slt @@ -0,0 +1,75 @@ +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + +statement ok +CREATE TABLE y (a BIGINT); + +statement ok +INSERT INTO y VALUES (1); + +query T +WITH MUTUALLY RECURSIVE + bar(x NUMERIC) as (SELECT sum(a) FROM y) +SELECT x FROM bar +---- +1 + +query T +WITH MUTUALLY RECURSIVE + bar(x NUMERIC) as (SELECT sum(a) FROM y) +SELECT pg_typeof(x) FROM bar +---- +numeric + +query T +WITH MUTUALLY RECURSIVE + bar(x NUMERIC) as (SELECT sum(a) + 1.23456 FROM y) +SELECT x FROM bar +---- +2.23456 + +query T +WITH MUTUALLY RECURSIVE + bar(x NUMERIC(38,2)) as (SELECT sum(a) + 1.23456 FROM y) +SELECT x FROM bar +---- +2.23 + +query T +WITH MUTUALLY RECURSIVE + bar(x UINT2) as (SELECT 1::INT8) +SELECT x FROM bar +---- +1 + +query error "-1" uint2 out of range +WITH MUTUALLY RECURSIVE + bar(x UINT2) as (SELECT -1::INT8) +SELECT x FROM bar + +# TODO: '1' should be coercible to an integer. +query error db error: ERROR: WITH MUTUALLY RECURSIVE query "bar" declared types \(bigint\), but query returns types \(text\) +WITH MUTUALLY RECURSIVE + bar(x INT8) as (SELECT '1') +SELECT x FROM bar + +statement ok +CREATE TYPE list_numeric_scale_2 AS LIST (ELEMENT TYPE = NUMERIC(38,2)); + +query T +WITH MUTUALLY RECURSIVE + bar(x list_numeric_scale_2) as (SELECT LIST[sum(a) + 1.2345] FROM y) +SELECT x::TEXT FROM bar +---- +{2.23} + +query error db error: ERROR: WITH MUTUALLY RECURSIVE query "bar" declared types \(list_numeric_scale_2\), but query returns types \(text list\) +WITH MUTUALLY RECURSIVE + bar(x list_numeric_scale_2) as (SELECT LIST['1'::TEXT]) +SELECT x FROM bar