From 6e6079043357a6c2f67606e19b465a6ebcd0262a Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Tue, 8 Jul 2025 10:25:12 +0200 Subject: [PATCH 01/17] feat(istmus): add ddl support to sql->substrait # Conflicts: # isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java # Conflicts: # isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java --- .../io/substrait/isthmus/SqlToSubstrait.java | 77 ++++++++++- .../isthmus/expression/DdlRelBuilder.java | 123 ++++++++++++++++++ .../substrait/isthmus/DdlRoundtripTest.java | 52 ++++++++ 3 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 isthmus/src/main/java/io/substrait/isthmus/expression/DdlRelBuilder.java create mode 100644 isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java index 3e19ca58c..1ad0effae 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java @@ -1,12 +1,25 @@ package io.substrait.isthmus; +import com.google.common.annotations.VisibleForTesting; import io.substrait.isthmus.sql.SubstraitSqlToCalcite; +import io.substrait.isthmus.sql.SubstraitSqlValidator; import io.substrait.plan.ImmutablePlan.Builder; import io.substrait.plan.Plan; import io.substrait.plan.Plan.Version; import io.substrait.plan.PlanProtoConverter; +import java.util.List; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; import org.apache.calcite.prepare.Prepare; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.rules.CoreRules; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; import org.apache.calcite.sql.parser.SqlParseException; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.validate.SqlValidator; +import org.apache.calcite.sql2rel.SqlToRelConverter; +import org.apache.calcite.sql2rel.StandardConvertletTable; /** Take a SQL statement and a set of table definitions and return a substrait plan. */ public class SqlToSubstrait extends SqlConverterBase { @@ -53,10 +66,72 @@ public Plan convert(String sqlStatements, Prepare.CatalogReader catalogReader) builder.version(Version.builder().from(Version.DEFAULT_VERSION).producer("isthmus").build()); // TODO: consider case in which one sql passes conversion while others don't - SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader).stream() + sqlToRelNode(sqlStatements, catalogReader).stream() .map(root -> SubstraitRelVisitor.convert(root, EXTENSION_COLLECTION, featureBoard)) .forEach(root -> builder.addRoots(root)); return builder.build(); } + + /** + * Converts one or more SQL statements into a Substrait {@link Plan}. + * + * @param sqlStatements a string containing one more SQL statements + * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in + * the SQL statements + * @return the Substrait {@link Plan} + * @throws SqlParseException if there is an error while parsing the SQL statements + */ + public Plan convertNew(String sqlStatements, Prepare.CatalogReader catalogReader) + throws SqlParseException { + Builder builder = io.substrait.plan.Plan.builder(); + builder.version(Version.builder().from(Version.DEFAULT_VERSION).producer("isthmus").build()); + + // TODO: consider case in which one sql passes conversion while others don't + SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader).stream() + .map(root -> SubstraitRelVisitor.convert(root, EXTENSION_COLLECTION, featureBoard)) + .forEach(root -> builder.addRoots(root)); + + return builder.build(); + } + + @VisibleForTesting + List sqlToRelNode(String sql, Prepare.CatalogReader catalogReader) + throws SqlParseException { + SqlValidator validator = new SubstraitSqlValidator(catalogReader); + SqlParser parser = SqlParser.create(sql, parserConfig); + SqlNodeList parsedList = parser.parseStmtList(); + SqlToRelConverter converter = createSqlToRelConverter(validator, catalogReader); + List roots = + parsedList.stream() + .map(parsed -> getBestExpRelRoot(converter, parsed)) + .collect(java.util.stream.Collectors.toList()); + return roots; + } + + protected SqlToRelConverter createSqlToRelConverter( + SqlValidator validator, Prepare.CatalogReader catalogReader) { + SqlToRelConverter converter = + new SqlToRelConverter( + null, + validator, + catalogReader, + relOptCluster, + StandardConvertletTable.INSTANCE, + converterConfig); + return converter; + } + + protected RelRoot getBestExpRelRoot(SqlToRelConverter converter, SqlNode parsed) { + RelRoot root = converter.convertQuery(parsed, true, true); + { + // RelBuilder seems to implicitly use the rule below, + // need to add to avoid discrepancies in assertFullRoundTrip + HepProgram program = HepProgram.builder().addRuleInstance(CoreRules.PROJECT_REMOVE).build(); + HepPlanner hepPlanner = new HepPlanner(program); + hepPlanner.setRoot(root.rel); + root = root.withRel(hepPlanner.findBestExp()); + } + return root; + } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/DdlRelBuilder.java b/isthmus/src/main/java/io/substrait/isthmus/expression/DdlRelBuilder.java new file mode 100644 index 000000000..ddff9e173 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/DdlRelBuilder.java @@ -0,0 +1,123 @@ +package io.substrait.isthmus.expression; + +import io.substrait.expression.Expression; +import io.substrait.expression.ExpressionCreator; +import io.substrait.extension.SimpleExtension; +import io.substrait.isthmus.FeatureBoard; +import io.substrait.isthmus.SubstraitRelVisitor; +import io.substrait.isthmus.TypeConverter; +import io.substrait.plan.Plan; +import io.substrait.relation.AbstractDdlRel; +import io.substrait.relation.AbstractWriteRel; +import io.substrait.relation.NamedDdl; +import io.substrait.relation.NamedWrite; +import io.substrait.type.NamedStruct; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.BiFunction; +import java.util.function.Function; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.ddl.SqlCreateTable; +import org.apache.calcite.sql.ddl.SqlCreateView; +import org.apache.calcite.sql.util.SqlBasicVisitor; +import org.apache.calcite.sql2rel.SqlToRelConverter; + +public class DdlRelBuilder extends SqlBasicVisitor { + protected final Map, Function> createHandlers = + new ConcurrentHashMap<>(); + + private final SqlToRelConverter converter; + private final BiFunction bestExpRelRootGetter; + private final SimpleExtension.ExtensionCollection extensionCollection; + private final FeatureBoard featureBoard; + + public DdlRelBuilder( + final SqlToRelConverter converter, + final BiFunction bestExpRelRootGetter, + final SimpleExtension.ExtensionCollection extensionCollection, + final FeatureBoard featureBoard) { + super(); + this.converter = converter; + this.bestExpRelRootGetter = bestExpRelRootGetter; + this.extensionCollection = extensionCollection; + this.featureBoard = featureBoard; + + createHandlers.put( + SqlCreateTable.class, sqlCall -> handleCreateTable((SqlCreateTable) sqlCall)); + createHandlers.put(SqlCreateView.class, sqlCall -> handleCreateView((SqlCreateView) sqlCall)); + } + + private Function findCreateHandler(final SqlCall call) { + Class currentClass = call.getClass(); + while (SqlCall.class.isAssignableFrom(currentClass)) { + final Function found = createHandlers.get(currentClass); + if (found != null) { + return found; + } + currentClass = currentClass.getSuperclass(); + } + return null; + } + + @Override + public Plan.Root visit(final SqlCall sqlCall) { + Function createHandler = findCreateHandler(sqlCall); + if (createHandler == null) { + return null; + } + + return createHandler.apply(sqlCall); + } + + private NamedStruct getSchema(final RelRoot queryRelRoot) { + final RelDataType rowType = queryRelRoot.rel.getRowType(); + + final TypeConverter typeConverter = TypeConverter.DEFAULT; + return typeConverter.toNamedStruct(rowType); + } + + protected Plan.Root handleCreateTable(final SqlCreateTable sqlCreateTable) { + if (sqlCreateTable.query == null) { + throw new IllegalArgumentException("Only create table as select statements are supported"); + } + + final RelRoot queryRelRoot = bestExpRelRootGetter.apply(converter, sqlCreateTable.query); + + NamedStruct schema = getSchema(queryRelRoot); + + Plan.Root rel = SubstraitRelVisitor.convert(queryRelRoot, extensionCollection, featureBoard); + NamedWrite namedWrite = + NamedWrite.builder() + .input(rel.getInput()) + .tableSchema(schema) + .operation(AbstractWriteRel.WriteOp.CTAS) + .createMode(AbstractWriteRel.CreateMode.REPLACE_IF_EXISTS) + .outputMode(AbstractWriteRel.OutputMode.NO_OUTPUT) + .names(sqlCreateTable.name.names) + .build(); + + return Plan.Root.builder().input(namedWrite).build(); + } + + protected Plan.Root handleCreateView(final SqlCreateView sqlCreateView) { + + final RelRoot queryRelRoot = bestExpRelRootGetter.apply(converter, sqlCreateView.query); + Plan.Root rel = SubstraitRelVisitor.convert(queryRelRoot, extensionCollection, featureBoard); + final Expression.StructLiteral defaults = ExpressionCreator.struct(false); + + final NamedDdl namedDdl = + NamedDdl.builder() + .viewDefinition(rel.getInput()) + .tableSchema(getSchema(queryRelRoot)) + .tableDefaults(defaults) + .operation(AbstractDdlRel.DdlOp.CREATE) + .object(AbstractDdlRel.DdlObject.VIEW) + .names(sqlCreateView.name.names) + .build(); + + return Plan.Root.builder().input(namedDdl).build(); + } +} diff --git a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java new file mode 100644 index 000000000..579fc938d --- /dev/null +++ b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java @@ -0,0 +1,52 @@ +package io.substrait.isthmus; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.substrait.isthmus.sql.SubstraitCreateStatementParser; +import io.substrait.plan.Plan; +import io.substrait.plan.PlanProtoConverter; +import io.substrait.plan.ProtoPlanConverter; +import org.apache.calcite.prepare.Prepare; +import org.apache.calcite.sql.parser.SqlParseException; +import org.junit.jupiter.api.Test; + +class DdlRoundtripTest extends PlanTestBase { + final Prepare.CatalogReader catalogReader = + SubstraitCreateStatementParser.processCreateStatementsToCatalog( + "create table src1 (intcol int, charcol varchar(10))", + "create table src2 (intcol int, charcol varchar(10))"); + + public DdlRoundtripTest() throws SqlParseException { + super(); + } + + void testSqlToSubstrait(String sqlStatement) throws SqlParseException { + SqlToSubstrait sqlToSubstrait = new SqlToSubstrait(); + io.substrait.proto.Plan protoPlan = sqlToSubstrait.execute(sqlStatement, catalogReader); + Plan plan = new ProtoPlanConverter().from(protoPlan); + io.substrait.proto.Plan protoPlan1 = new PlanProtoConverter().toProto(plan); + assertEquals(protoPlan, protoPlan1); + } + + void testPlanRoundTrip(String sqlStatement) throws SqlParseException { + SqlToSubstrait sql2subst = new SqlToSubstrait(); + final Plan plan = sql2subst.convert(sqlStatement, catalogReader); + + assertPlanRoundtrip(plan); + } + + @Test + void testCreateTable() throws SqlParseException { + String sql = "create table dst1 as select * from src1"; + testSqlToSubstrait(sql); + // TBD: full roundtrip is not possible because there is no relational algebra for DDL + testPlanRoundTrip(sql); + } + + @Test + void testCreateView() throws SqlParseException { + String sql = "create view dst1 as select * from src1"; + testSqlToSubstrait(sql); + testPlanRoundTrip(sql); + } +} From a70e0c33823034b6f76f7a88795bb2bf765ec6d3 Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Tue, 12 Aug 2025 15:23:57 +0200 Subject: [PATCH 02/17] chore(isthmus): refactor sqlToSubstrait --- .../io/substrait/isthmus/SqlToSubstrait.java | 48 ++++++++++++++++++- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java index 1ad0effae..63771866d 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java @@ -1,12 +1,13 @@ package io.substrait.isthmus; import com.google.common.annotations.VisibleForTesting; -import io.substrait.isthmus.sql.SubstraitSqlToCalcite; +import io.substrait.isthmus.expression.DdlRelBuilder; import io.substrait.isthmus.sql.SubstraitSqlValidator; import io.substrait.plan.ImmutablePlan.Builder; import io.substrait.plan.Plan; import io.substrait.plan.Plan.Version; import io.substrait.plan.PlanProtoConverter; +import java.util.ArrayList; import java.util.List; import org.apache.calcite.plan.hep.HepPlanner; import org.apache.calcite.plan.hep.HepProgram; @@ -109,6 +110,49 @@ List sqlToRelNode(String sql, Prepare.CatalogReader catalogReader) return roots; } + protected void sqlToPlanRoots( + String sql, SqlValidator validator, Prepare.CatalogReader catalogReader, Builder builder) + throws SqlParseException { + + SqlParser parser = SqlParser.create(sql, parserConfig); + SqlNodeList parsedList = parser.parseStmtList(); + if (parsedList.isEmpty()) { + return; + } + + SqlToRelConverter converter = createSqlToRelConverter(validator, catalogReader); + DdlRelBuilder ddlRelBuilder = + new DdlRelBuilder( + converter, SqlToSubstrait::getBestExpRelRoot, EXTENSION_COLLECTION, featureBoard); + + List nonDdlNodes = new ArrayList<>(); + + for (SqlNode sqlNode : parsedList) { + final io.substrait.plan.Plan.Root ddlRoot = sqlNode.accept(ddlRelBuilder); + if (ddlRoot != null) { + builder.addRoots(ddlRoot); + } else { + nonDdlNodes.add(sqlNode); + } + } + + if (!nonDdlNodes.isEmpty()) { + SqlNodeList dmlNodes = new SqlNodeList(nonDdlNodes, parsedList.getParserPosition()); + + List relRoots = sqlNodesToRelNode(dmlNodes, converter); + relRoots.stream() + .map(root -> SubstraitRelVisitor.convert(root, EXTENSION_COLLECTION, featureBoard)) + .forEach(builder::addRoots); + } + } + + private List sqlNodesToRelNode( + final SqlNodeList parsedList, final SqlToRelConverter converter) { + return parsedList.stream() + .map(parsed -> getBestExpRelRoot(converter, parsed)) + .collect(java.util.stream.Collectors.toList()); + } + protected SqlToRelConverter createSqlToRelConverter( SqlValidator validator, Prepare.CatalogReader catalogReader) { SqlToRelConverter converter = @@ -122,7 +166,7 @@ protected SqlToRelConverter createSqlToRelConverter( return converter; } - protected RelRoot getBestExpRelRoot(SqlToRelConverter converter, SqlNode parsed) { + protected static RelRoot getBestExpRelRoot(SqlToRelConverter converter, SqlNode parsed) { RelRoot root = converter.convertQuery(parsed, true, true); { // RelBuilder seems to implicitly use the rule below, From e15643a8a8803a9340db315d07b2dd9c7f931b47 Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Wed, 20 Aug 2025 15:40:04 +0200 Subject: [PATCH 03/17] refactor(isthmus): introduce CacliteOperation to represent rel algebra and ddl --- .../isthmus/CalciteOperationToSubstrait.java | 79 ++++++ .../io/substrait/isthmus/SqlToSubstrait.java | 14 +- .../isthmus/SubstraitRelNodeConverter.java | 13 +- .../isthmus/SubstraitToCalciteOperation.java | 77 +++++ .../isthmus/operation/CalciteOperation.java | 7 + .../CalciteOperationBasicVisitor.java | 38 +++ .../operation/CalciteOperationBuilder.java | 69 +++++ .../operation/CalciteOperationVisitor.java | 17 ++ .../substrait/isthmus/operation/Create.java | 33 +++ .../isthmus/operation/CreateTableAs.java | 15 + .../isthmus/operation/CreateView.java | 15 + .../isthmus/operation/CreateWithInput.java | 27 ++ .../isthmus/operation/DdlOperation.java | 28 ++ .../operation/RelationalOperation.java | 20 ++ .../isthmus/operation/SqlKindFromRel.java | 264 ++++++++++++++++++ .../substrait/isthmus/DdlRoundtripTest.java | 40 ++- 16 files changed, 745 insertions(+), 11 deletions(-) create mode 100644 isthmus/src/main/java/io/substrait/isthmus/CalciteOperationToSubstrait.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalciteOperation.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperation.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBasicVisitor.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBuilder.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationVisitor.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/Create.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/CreateTableAs.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/CreateView.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/CreateWithInput.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/DdlOperation.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/RelationalOperation.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/SqlKindFromRel.java diff --git a/isthmus/src/main/java/io/substrait/isthmus/CalciteOperationToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/CalciteOperationToSubstrait.java new file mode 100644 index 000000000..d21f20ff9 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/CalciteOperationToSubstrait.java @@ -0,0 +1,79 @@ +package io.substrait.isthmus; + +import io.substrait.expression.Expression; +import io.substrait.expression.ExpressionCreator; +import io.substrait.extension.SimpleExtension; +import io.substrait.isthmus.operation.CalciteOperationBasicVisitor; +import io.substrait.isthmus.operation.CreateTableAs; +import io.substrait.isthmus.operation.CreateView; +import io.substrait.isthmus.operation.RelationalOperation; +import io.substrait.plan.Plan; +import io.substrait.relation.AbstractDdlRel; +import io.substrait.relation.AbstractWriteRel; +import io.substrait.relation.NamedDdl; +import io.substrait.relation.NamedWrite; +import io.substrait.type.NamedStruct; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.type.RelDataType; + +public class CalciteOperationToSubstrait extends CalciteOperationBasicVisitor { + private final SimpleExtension.ExtensionCollection extensionCollection; + private final FeatureBoard featureBoard; + + public CalciteOperationToSubstrait( + SimpleExtension.ExtensionCollection extensionCollection, FeatureBoard featureBoard) { + this.extensionCollection = extensionCollection; + this.featureBoard = featureBoard; + } + + private NamedStruct getSchema(final RelRoot queryRelRoot) { + final RelDataType rowType = queryRelRoot.rel.getRowType(); + + final TypeConverter typeConverter = TypeConverter.DEFAULT; + return typeConverter.toNamedStruct(rowType); + } + + @Override + public Plan.Root visit(RelationalOperation relationalOperation) { + return SubstraitRelVisitor.convert( + relationalOperation.getRelRoot(), extensionCollection, featureBoard); + } + + @Override + public Plan.Root visit(CreateTableAs createTableAs) { + RelRoot input = createTableAs.getInput(); + Plan.Root rel = SubstraitRelVisitor.convert(input, extensionCollection, featureBoard); + NamedStruct schema = getSchema(input); + + NamedWrite namedWrite = + NamedWrite.builder() + .input(rel.getInput()) + .tableSchema(schema) + .operation(AbstractWriteRel.WriteOp.CTAS) + .createMode(AbstractWriteRel.CreateMode.REPLACE_IF_EXISTS) + .outputMode(AbstractWriteRel.OutputMode.NO_OUTPUT) + .names(createTableAs.getNames()) + .build(); + + return Plan.Root.builder().input(namedWrite).build(); + } + + @Override + public Plan.Root visit(CreateView createView) { + RelRoot input = createView.getInput(); + Plan.Root rel = SubstraitRelVisitor.convert(input, extensionCollection, featureBoard); + final Expression.StructLiteral defaults = ExpressionCreator.struct(false); + + final NamedDdl namedDdl = + NamedDdl.builder() + .viewDefinition(rel.getInput()) + .tableSchema(getSchema(input)) + .tableDefaults(defaults) + .operation(AbstractDdlRel.DdlOp.CREATE) + .object(AbstractDdlRel.DdlObject.VIEW) + .names(createView.getNames()) + .build(); + + return Plan.Root.builder().input(namedDdl).build(); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java index 63771866d..7f6b4ea59 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java @@ -7,7 +7,6 @@ import io.substrait.plan.Plan; import io.substrait.plan.Plan.Version; import io.substrait.plan.PlanProtoConverter; -import java.util.ArrayList; import java.util.List; import org.apache.calcite.plan.hep.HepPlanner; import org.apache.calcite.plan.hep.HepProgram; @@ -110,7 +109,18 @@ List sqlToRelNode(String sql, Prepare.CatalogReader catalogReader) return roots; } - protected void sqlToPlanRoots( + void sqlToPlanRoots( + String sql, SqlValidator validator, Prepare.CatalogReader catalogReader, Builder builder) + throws SqlParseException { + List calciteOperations = sqlToCalciteOperation(sql, validator, catalogReader); + CalciteOperationToSubstrait calciteOperationToSubstrait = + new CalciteOperationToSubstrait(EXTENSION_COLLECTION, featureBoard); + calciteOperations.stream() + .map(operation -> operation.accept(calciteOperationToSubstrait)) + .forEach(builder::addRoots); + } + + void sqlToPlanRoots1( String sql, SqlValidator validator, Prepare.CatalogReader catalogReader, Builder builder) throws SqlParseException { diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java index 801110c5f..d1ce8a13b 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java @@ -588,11 +588,8 @@ public RelNode visit(VirtualTableScan virtualTableScan, Context context) { public RelNode visit(NamedWrite write, Context context) { RelNode input = write.getInput().accept(this, context); assert relBuilder.getRelOptSchema() != null; - final RelOptTable table = relBuilder.getRelOptSchema().getTableForMember(write.getNames()); - - if (table == null) { - throw new IllegalStateException("Table not found in Calcite catalog: " + write.getNames()); - } + final RelOptTable targetTable = + relBuilder.getRelOptSchema().getTableForMember(write.getNames()); TableModify.Operation operation; switch (write.getOperation()) { @@ -610,8 +607,12 @@ public RelNode visit(NamedWrite write, Context context) { + "Check if a more specific relation type (e.g., NamedUpdate) should be used."); } + if (targetTable == null) { + throw new IllegalStateException("Table not found in Calcite catalog: " + write.getNames()); + } + return LogicalTableModify.create( - table, + targetTable, (Prepare.CatalogReader) relBuilder.getRelOptSchema(), input, operation, diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalciteOperation.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalciteOperation.java new file mode 100644 index 000000000..6399f54ed --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalciteOperation.java @@ -0,0 +1,77 @@ +package io.substrait.isthmus; + +import io.substrait.extension.SimpleExtension; +import io.substrait.isthmus.operation.CalciteOperation; +import io.substrait.isthmus.operation.CreateTableAs; +import io.substrait.isthmus.operation.CreateView; +import io.substrait.isthmus.operation.RelationalOperation; +import io.substrait.isthmus.operation.SqlKindFromRel; +import io.substrait.relation.AbstractRelVisitor; +import io.substrait.relation.NamedDdl; +import io.substrait.relation.NamedWrite; +import io.substrait.relation.Rel; +import io.substrait.util.EmptyVisitationContext; +import java.util.List; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.prepare.Prepare; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.RelBuilder; + +public class SubstraitToCalciteOperation + extends AbstractRelVisitor< + CalciteOperation, SubstraitRelNodeConverter.Context, RuntimeException> { + + private final SubstraitRelNodeConverter substraitRelNodeConverter; + + public SubstraitToCalciteOperation( + SimpleExtension.ExtensionCollection extensionCollection, + RelDataTypeFactory relDataTypeFactory, + Prepare.CatalogReader catalogReader, + SqlParser.Config parserConfig) { + + RelBuilder relBuilder = + RelBuilder.create( + Frameworks.newConfigBuilder() + .parserConfig(parserConfig) + .defaultSchema(catalogReader.getRootSchema().plus()) + .traitDefs((List) null) + .programs() + .build()); + this.substraitRelNodeConverter = + new SubstraitRelNodeConverter(extensionCollection, relDataTypeFactory, relBuilder); + } + + @Override + public CalciteOperation visit(NamedDdl namedDdl, SubstraitRelNodeConverter.Context ctx) { + if (namedDdl.getViewDefinition().isEmpty()) { + throw new IllegalArgumentException("no view definition found"); + } + Rel viewDefinition = namedDdl.getViewDefinition().get(); + RelNode relNode = viewDefinition.accept(substraitRelNodeConverter, ctx); + RelRoot relRoot = RelRoot.of(relNode, SqlKind.CREATE_VIEW); + + return new CreateView(namedDdl.getNames(), relRoot); + } + + @Override + public CalciteOperation visit(NamedWrite namedWrite, SubstraitRelNodeConverter.Context ctx) { + Rel input = namedWrite.getInput(); + RelNode relNode = input.accept(substraitRelNodeConverter, ctx); + RelRoot relRoot = RelRoot.of(relNode, SqlKind.CREATE_TABLE); + return new CreateTableAs(namedWrite.getNames(), relRoot); + } + + @Override + public CalciteOperation visitFallback(Rel rel, SubstraitRelNodeConverter.Context context) { + SqlKindFromRel sqlKindFromRel = new SqlKindFromRel(); + SqlKind kind = rel.accept(sqlKindFromRel, EmptyVisitationContext.INSTANCE); + RelNode relNode = rel.accept(substraitRelNodeConverter, context); + RelRoot relRoot = RelRoot.of(relNode, kind); + return new RelationalOperation(relRoot); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperation.java b/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperation.java new file mode 100644 index 000000000..96f43c841 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperation.java @@ -0,0 +1,7 @@ +package io.substrait.isthmus.operation; + +public class CalciteOperation { + public R accept(CalciteOperationVisitor visitor) { + return visitor.visit(this); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBasicVisitor.java b/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBasicVisitor.java new file mode 100644 index 000000000..dee4c9fdb --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBasicVisitor.java @@ -0,0 +1,38 @@ +package io.substrait.isthmus.operation; + +public class CalciteOperationBasicVisitor implements CalciteOperationVisitor { + @Override + public R visit(CalciteOperation operation) { + return null; + } + + @Override + public R visit(RelationalOperation relationalOperation) { + return visit((CalciteOperation) relationalOperation); + } + + @Override + public R visit(DdlOperation ddlOperation) { + return visit((CalciteOperation) ddlOperation); + } + + @Override + public R visit(CreateWithInput createWithInput) { + return visit((Create) createWithInput); + } + + @Override + public R visit(Create create) { + return visit((DdlOperation) create); + } + + @Override + public R visit(CreateTableAs createTableAs) { + return visit((CreateWithInput) createTableAs); + } + + @Override + public R visit(CreateView createView) { + return visit((CreateWithInput) createView); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBuilder.java b/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBuilder.java new file mode 100644 index 000000000..74c0f22af --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBuilder.java @@ -0,0 +1,69 @@ +package io.substrait.isthmus.operation; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.BiFunction; +import java.util.function.Function; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.ddl.SqlCreateTable; +import org.apache.calcite.sql.ddl.SqlCreateView; +import org.apache.calcite.sql.util.SqlBasicVisitor; +import org.apache.calcite.sql2rel.SqlToRelConverter; + +public class CalciteOperationBuilder extends SqlBasicVisitor { + protected final Map, Function> ddlHandlers = + new ConcurrentHashMap<>(); + private final SqlToRelConverter converter; + private final BiFunction bestExprRelRootBuilder; + + public CalciteOperationBuilder( + final SqlToRelConverter converter, + BiFunction bestExprRelRootBuilder) { + this.converter = converter; + this.bestExprRelRootBuilder = bestExprRelRootBuilder; + + ddlHandlers.put(SqlCreateTable.class, sqlCall -> handleCreateTable((SqlCreateTable) sqlCall)); + ddlHandlers.put(SqlCreateView.class, sqlCall -> handleCreateView((SqlCreateView) sqlCall)); + } + + private Function findDdlHandler(final SqlCall call) { + Class currentClass = call.getClass(); + while (SqlCall.class.isAssignableFrom(currentClass)) { + final Function found = ddlHandlers.get(currentClass); + if (found != null) { + return found; + } + currentClass = currentClass.getSuperclass(); + } + return null; + } + + @Override + public CalciteOperation visit(final SqlCall sqlCall) { + Function ddlHandler = findDdlHandler(sqlCall); + if (ddlHandler != null) { + return ddlHandler.apply(sqlCall); + } + return handleRelationalOperation(sqlCall); + } + + protected CalciteOperation handleRelationalOperation(final SqlNode sqlNode) { + return new RelationalOperation(bestExprRelRootBuilder.apply(converter, sqlNode)); + } + + protected CalciteOperation handleCreateTable(final SqlCreateTable sqlCreateTable) { + if (sqlCreateTable.query == null) { + throw new IllegalArgumentException("Only create table as select statements are supported"); + } + + final RelRoot queryRelRoot = bestExprRelRootBuilder.apply(converter, sqlCreateTable.query); + return new CreateTableAs(sqlCreateTable.name.names, queryRelRoot); + } + + protected CalciteOperation handleCreateView(final SqlCreateView sqlCreateView) { + final RelRoot queryRelRoot = bestExprRelRootBuilder.apply(converter, sqlCreateView.query); + return new CreateView(sqlCreateView.name.names, queryRelRoot); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationVisitor.java b/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationVisitor.java new file mode 100644 index 000000000..6b69523b2 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationVisitor.java @@ -0,0 +1,17 @@ +package io.substrait.isthmus.operation; + +public interface CalciteOperationVisitor { + R visit(CalciteOperation operation); + + R visit(RelationalOperation relationalOperation); + + R visit(DdlOperation ddlOperation); + + R visit(CreateWithInput createWithInput); + + R visit(Create create); + + R visit(CreateTableAs createTableAs); + + R visit(CreateView createView); +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/Create.java b/isthmus/src/main/java/io/substrait/isthmus/operation/Create.java new file mode 100644 index 000000000..dbae7a86b --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/operation/Create.java @@ -0,0 +1,33 @@ +package io.substrait.isthmus.operation; + +import java.util.List; + +public abstract class Create extends DdlOperation { + private final List names; + private final RelationType relationType; + + public Create(RelationType relationType, List names) { + this.relationType = relationType; + this.names = names; + } + + @Override + public List getNames() { + return names; + } + + @Override + public RelationType getRelationType() { + return relationType; + } + + @Override + public Operation getOperation() { + return Operation.CREATE; + } + + @Override + public R accept(CalciteOperationVisitor visitor) { + return visitor.visit(this); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/CreateTableAs.java b/isthmus/src/main/java/io/substrait/isthmus/operation/CreateTableAs.java new file mode 100644 index 000000000..15c05fcf0 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/operation/CreateTableAs.java @@ -0,0 +1,15 @@ +package io.substrait.isthmus.operation; + +import java.util.List; +import org.apache.calcite.rel.RelRoot; + +public class CreateTableAs extends CreateWithInput { + public CreateTableAs(List names, RelRoot input) { + super(RelationType.TABLE, names, input); + } + + @Override + public R accept(CalciteOperationVisitor visitor) { + return visitor.visit(this); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/CreateView.java b/isthmus/src/main/java/io/substrait/isthmus/operation/CreateView.java new file mode 100644 index 000000000..d30998fa4 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/operation/CreateView.java @@ -0,0 +1,15 @@ +package io.substrait.isthmus.operation; + +import java.util.List; +import org.apache.calcite.rel.RelRoot; + +public class CreateView extends CreateWithInput { + public CreateView(List names, RelRoot input) { + super(RelationType.VIEW, names, input); + } + + @Override + public R accept(CalciteOperationVisitor visitor) { + return visitor.visit(this); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/CreateWithInput.java b/isthmus/src/main/java/io/substrait/isthmus/operation/CreateWithInput.java new file mode 100644 index 000000000..685f0c5f1 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/operation/CreateWithInput.java @@ -0,0 +1,27 @@ +package io.substrait.isthmus.operation; + +import java.util.List; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.type.RelDataType; + +public abstract class CreateWithInput extends Create { + private final RelRoot input; + + public CreateWithInput(RelationType relationType, List names, RelRoot input) { + super(relationType, names); + this.input = input; + } + + public RelRoot getInput() { + return input; + } + + public RelDataType getRowType() { + return input.rel.getRowType(); + } + + @Override + public R accept(CalciteOperationVisitor visitor) { + return visitor.visit(this); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/DdlOperation.java b/isthmus/src/main/java/io/substrait/isthmus/operation/DdlOperation.java new file mode 100644 index 000000000..b1419d814 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/operation/DdlOperation.java @@ -0,0 +1,28 @@ +package io.substrait.isthmus.operation; + +import java.util.List; + +public abstract class DdlOperation extends CalciteOperation { + public enum RelationType { + TABLE, + VIEW, + MATERIALIZED_VIEW + // TODO: , SCHEMA + } + + public enum Operation { + CREATE, + DROP + } + + public abstract List getNames(); + + public abstract RelationType getRelationType(); + + public abstract Operation getOperation(); + + @Override + public R accept(CalciteOperationVisitor visitor) { + return visitor.visit(this); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/RelationalOperation.java b/isthmus/src/main/java/io/substrait/isthmus/operation/RelationalOperation.java new file mode 100644 index 000000000..e89c08b89 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/operation/RelationalOperation.java @@ -0,0 +1,20 @@ +package io.substrait.isthmus.operation; + +import org.apache.calcite.rel.RelRoot; + +public class RelationalOperation extends CalciteOperation { + private final RelRoot relRoot; + + public RelationalOperation(RelRoot relRoot) { + this.relRoot = relRoot; + } + + public RelRoot getRelRoot() { + return relRoot; + } + + @Override + public R accept(CalciteOperationVisitor visitor) { + return visitor.visit(this); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/SqlKindFromRel.java b/isthmus/src/main/java/io/substrait/isthmus/operation/SqlKindFromRel.java new file mode 100644 index 000000000..4eb5b69f8 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/operation/SqlKindFromRel.java @@ -0,0 +1,264 @@ +package io.substrait.isthmus.operation; + +import io.substrait.relation.Aggregate; +import io.substrait.relation.ConsistentPartitionWindow; +import io.substrait.relation.Cross; +import io.substrait.relation.EmptyScan; +import io.substrait.relation.Expand; +import io.substrait.relation.ExtensionDdl; +import io.substrait.relation.ExtensionLeaf; +import io.substrait.relation.ExtensionMulti; +import io.substrait.relation.ExtensionSingle; +import io.substrait.relation.ExtensionTable; +import io.substrait.relation.ExtensionWrite; +import io.substrait.relation.Fetch; +import io.substrait.relation.Filter; +import io.substrait.relation.Join; +import io.substrait.relation.LocalFiles; +import io.substrait.relation.NamedDdl; +import io.substrait.relation.NamedScan; +import io.substrait.relation.NamedUpdate; +import io.substrait.relation.NamedWrite; +import io.substrait.relation.Project; +import io.substrait.relation.RelVisitor; +import io.substrait.relation.Set; +import io.substrait.relation.Sort; +import io.substrait.relation.VirtualTableScan; +import io.substrait.relation.physical.HashJoin; +import io.substrait.relation.physical.MergeJoin; +import io.substrait.relation.physical.NestedLoopJoin; +import io.substrait.util.EmptyVisitationContext; +import org.apache.calcite.sql.SqlKind; + +/** + * A visitor to infer the general SqlKind from the root of a Substrait Rel tree. Note: This infers + * the general operation type, as the original SQL syntax is not preserved in the Substrait plan. + */ +public class SqlKindFromRel + implements RelVisitor { + + // Most common query operations map to SELECT. + private static final SqlKind QUERY_KIND = SqlKind.SELECT; + + @Override + public SqlKind visit(Aggregate aggregate, EmptyVisitationContext context) + throws RuntimeException { + // Aggregation is a core part of a query (DQL). + return QUERY_KIND; + } + + @Override + public SqlKind visit(EmptyScan emptyScan, EmptyVisitationContext context) + throws RuntimeException { + // An empty scan is typically the result of a query that returns no rows. + return QUERY_KIND; + } + + @Override + public SqlKind visit(Fetch fetch, EmptyVisitationContext context) throws RuntimeException { + // Fetch corresponds to LIMIT/OFFSET, a part of a query. + return QUERY_KIND; + } + + @Override + public SqlKind visit(Filter filter, EmptyVisitationContext context) throws RuntimeException { + // Filter corresponds to a WHERE clause, a part of a query. + return QUERY_KIND; + } + + @Override + public SqlKind visit(Join join, EmptyVisitationContext context) throws RuntimeException { + // A logical join operation. + return SqlKind.JOIN; + } + + @Override + public SqlKind visit(Set set, EmptyVisitationContext context) throws RuntimeException { + // Maps Substrait's Set operations to their SQL equivalents. + switch (set.getSetOp()) { + case UNION_ALL: + case UNION_DISTINCT: + return SqlKind.UNION; + case INTERSECTION_PRIMARY: + case INTERSECTION_MULTISET: + case INTERSECTION_MULTISET_ALL: + return SqlKind.INTERSECT; + case MINUS_PRIMARY: + case MINUS_PRIMARY_ALL: + case MINUS_MULTISET: + return SqlKind.EXCEPT; + case UNKNOWN: + default: + return SqlKind.OTHER; + } + } + + @Override + public SqlKind visit(NamedScan namedScan, EmptyVisitationContext context) + throws RuntimeException { + // A scan from a named table is the start of a query. + return QUERY_KIND; + } + + @Override + public SqlKind visit(LocalFiles localFiles, EmptyVisitationContext context) + throws RuntimeException { + // A scan from local files is a type of query input. + return QUERY_KIND; + } + + @Override + public SqlKind visit(Project project, EmptyVisitationContext context) throws RuntimeException { + // Project corresponds to the SELECT clause of a query. + return QUERY_KIND; + } + + @Override + public SqlKind visit(Expand expand, EmptyVisitationContext context) throws RuntimeException { + // Expand is a relational operator used in queries (e.g., for grouping sets). + return QUERY_KIND; + } + + @Override + public SqlKind visit(Sort sort, EmptyVisitationContext context) throws RuntimeException { + // A sort operation directly maps to ORDER BY. + return SqlKind.ORDER_BY; + } + + @Override + public SqlKind visit(Cross cross, EmptyVisitationContext context) throws RuntimeException { + // A cross join is a type of join. + return SqlKind.JOIN; + } + + @Override + public SqlKind visit(VirtualTableScan virtualTableScan, EmptyVisitationContext context) + throws RuntimeException { + // A virtual table scan corresponds to a VALUES clause. + return SqlKind.VALUES; + } + + @Override + public SqlKind visit(ExtensionLeaf extensionLeaf, EmptyVisitationContext context) + throws RuntimeException { + // Unknown extension node. + return SqlKind.OTHER; + } + + @Override + public SqlKind visit(ExtensionSingle extensionSingle, EmptyVisitationContext context) + throws RuntimeException { + // Unknown extension node. + return SqlKind.OTHER; + } + + @Override + public SqlKind visit(ExtensionMulti extensionMulti, EmptyVisitationContext context) + throws RuntimeException { + // Unknown extension node. + return SqlKind.OTHER; + } + + @Override + public SqlKind visit(ExtensionTable extensionTable, EmptyVisitationContext context) + throws RuntimeException { + // Unknown extension node. + return SqlKind.OTHER; + } + + @Override + public SqlKind visit(HashJoin hashJoin, EmptyVisitationContext context) throws RuntimeException { + // A physical hash join is a type of join. + return SqlKind.JOIN; + } + + @Override + public SqlKind visit(MergeJoin mergeJoin, EmptyVisitationContext context) + throws RuntimeException { + // A physical merge join is a type of join. + return SqlKind.JOIN; + } + + @Override + public SqlKind visit(NestedLoopJoin nestedLoopJoin, EmptyVisitationContext context) + throws RuntimeException { + // A physical nested loop join is a type of join. + return SqlKind.JOIN; + } + + @Override + public SqlKind visit( + ConsistentPartitionWindow consistentPartitionWindow, EmptyVisitationContext context) + throws RuntimeException { + // This relation represents a window function operation. + return SqlKind.OVER; + } + + @Override + public SqlKind visit(NamedWrite write, EmptyVisitationContext context) throws RuntimeException { + // DML and CTAS write operations. + switch (write.getOperation()) { + case INSERT: + return SqlKind.INSERT; + case DELETE: + return SqlKind.DELETE; + case UPDATE: + return SqlKind.UPDATE; + case CTAS: + return SqlKind.CREATE_TABLE; + default: + return SqlKind.OTHER; + } + } + + @Override + public SqlKind visit(ExtensionWrite write, EmptyVisitationContext context) + throws RuntimeException { + // Custom DML/DDL operations are best mapped to OTHER or OTHER_DDL. + return SqlKind.OTHER_DDL; + } + + @Override + public SqlKind visit(NamedDdl ddl, EmptyVisitationContext context) throws RuntimeException { + // DDL operations, determined by combining the operation and the object type. + switch (ddl.getOperation()) { + case CREATE: + case CREATE_OR_REPLACE: + if (ddl.getObject() == NamedDdl.DdlObject.TABLE) { + return SqlKind.CREATE_TABLE; + } else if (ddl.getObject() == NamedDdl.DdlObject.VIEW) { + return SqlKind.CREATE_VIEW; + } + break; + case DROP: + case DROP_IF_EXIST: + if (ddl.getObject() == NamedDdl.DdlObject.TABLE) { + return SqlKind.DROP_TABLE; + } else if (ddl.getObject() == NamedDdl.DdlObject.VIEW) { + return SqlKind.DROP_VIEW; + } + break; + case ALTER: + if (ddl.getObject() == NamedDdl.DdlObject.TABLE) { + return SqlKind.ALTER_TABLE; + } else if (ddl.getObject() == NamedDdl.DdlObject.VIEW) { + return SqlKind.ALTER_VIEW; + } + break; + } + return SqlKind.OTHER_DDL; + } + + @Override + public SqlKind visit(ExtensionDdl ddl, EmptyVisitationContext context) throws RuntimeException { + // A custom DDL operation. + return SqlKind.OTHER_DDL; + } + + @Override + public SqlKind visit(NamedUpdate update, EmptyVisitationContext context) throws RuntimeException { + // An update operation is a specific DML type. Note that modern Substrait + // producers should use NamedWrite with WriteOp.UPDATE instead. + return SqlKind.UPDATE; + } +} diff --git a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java index 579fc938d..932f657b8 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java @@ -2,10 +2,12 @@ import static org.junit.jupiter.api.Assertions.assertEquals; +import io.substrait.isthmus.operation.CalciteOperation; import io.substrait.isthmus.sql.SubstraitCreateStatementParser; import io.substrait.plan.Plan; import io.substrait.plan.PlanProtoConverter; import io.substrait.plan.ProtoPlanConverter; +import io.substrait.relation.Rel; import org.apache.calcite.prepare.Prepare; import org.apache.calcite.sql.parser.SqlParseException; import org.junit.jupiter.api.Test; @@ -28,15 +30,47 @@ void testSqlToSubstrait(String sqlStatement) throws SqlParseException { assertEquals(protoPlan, protoPlan1); } - void testPlanRoundTrip(String sqlStatement) throws SqlParseException { + protected void assertCalciteOperationSubstraitRelRoundTrip( + String query, Prepare.CatalogReader catalogReader) throws Exception { + // sql <--> substrait round trip test. + // Assert (sql -> calcite -> substrait) and (sql -> substrait -> calcite -> substrait) are same. + // Return list of sql -> Substrait rel -> Calcite rel. + SqlToSubstrait s = new SqlToSubstrait(); + + // 1. SQL -> Substrait Plan + Plan plan1 = s.convert(query, catalogReader); + + // 2. Substrait Plan -> Substrait Rel + Plan.Root pojo1 = plan1.getRoots().get(0); + + // 3. Substrait Rel -> CalciteOperation + + SubstraitToCalciteOperation substraitToCalciteOperation = + new SubstraitToCalciteOperation(extensions, typeFactory, catalogReader, s.parserConfig); + Rel rel = plan1.getRoots().get(0).getInput(); + CalciteOperation calciteOperation = + rel.accept(substraitToCalciteOperation, SubstraitRelNodeConverter.Context.newContext()); + + // 4. CalciteOperation -> Substrait Rel + CalciteOperationToSubstrait calciteOperationToSubstrait = + new CalciteOperationToSubstrait(extensions, s.featureBoard); + Plan.Root pojo2 = calciteOperation.accept(calciteOperationToSubstrait); + + assertEquals(pojo1, pojo2); + } + + void testPlanRoundTrip(String sqlStatement) throws Exception { SqlToSubstrait sql2subst = new SqlToSubstrait(); final Plan plan = sql2subst.convert(sqlStatement, catalogReader); assertPlanRoundtrip(plan); + assertCalciteOperationSubstraitRelRoundTrip(sqlStatement, catalogReader); + + assertFullRoundTrip(sqlStatement, catalogReader); } @Test - void testCreateTable() throws SqlParseException { + void testCreateTable() throws Exception { String sql = "create table dst1 as select * from src1"; testSqlToSubstrait(sql); // TBD: full roundtrip is not possible because there is no relational algebra for DDL @@ -44,7 +78,7 @@ void testCreateTable() throws SqlParseException { } @Test - void testCreateView() throws SqlParseException { + void testCreateView() throws Exception { String sql = "create view dst1 as select * from src1"; testSqlToSubstrait(sql); testPlanRoundTrip(sql); From de25ac66242ef5e475ef73b3a3008c63e33be498 Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Thu, 21 Aug 2025 17:24:23 +0200 Subject: [PATCH 04/17] chore(isthmus): rebase + refactoring --- .../io/substrait/isthmus/SqlToSubstrait.java | 133 +--------- .../sql/SubstraitSqlStatementParser.java | 14 + .../isthmus/sql/SubstraitSqlToCalcite.java | 248 +++++++++++++----- .../substrait/isthmus/DdlRoundtripTest.java | 2 +- .../substrait/isthmus/NameRoundtripTest.java | 2 +- .../isthmus/OptimizerIntegrationTest.java | 2 +- .../io/substrait/isthmus/PlanTestBase.java | 2 +- 7 files changed, 211 insertions(+), 192 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java index 7f6b4ea59..1d1398a1a 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java @@ -1,25 +1,14 @@ package io.substrait.isthmus; -import com.google.common.annotations.VisibleForTesting; -import io.substrait.isthmus.expression.DdlRelBuilder; -import io.substrait.isthmus.sql.SubstraitSqlValidator; +import io.substrait.isthmus.operation.CalciteOperation; +import io.substrait.isthmus.sql.SubstraitSqlToCalcite; import io.substrait.plan.ImmutablePlan.Builder; import io.substrait.plan.Plan; import io.substrait.plan.Plan.Version; import io.substrait.plan.PlanProtoConverter; import java.util.List; -import org.apache.calcite.plan.hep.HepPlanner; -import org.apache.calcite.plan.hep.HepProgram; import org.apache.calcite.prepare.Prepare; -import org.apache.calcite.rel.RelRoot; -import org.apache.calcite.rel.rules.CoreRules; -import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.sql.SqlNodeList; import org.apache.calcite.sql.parser.SqlParseException; -import org.apache.calcite.sql.parser.SqlParser; -import org.apache.calcite.sql.validate.SqlValidator; -import org.apache.calcite.sql2rel.SqlToRelConverter; -import org.apache.calcite.sql2rel.StandardConvertletTable; /** Take a SQL statement and a set of table definitions and return a substrait plan. */ public class SqlToSubstrait extends SqlConverterBase { @@ -66,126 +55,14 @@ public Plan convert(String sqlStatements, Prepare.CatalogReader catalogReader) builder.version(Version.builder().from(Version.DEFAULT_VERSION).producer("isthmus").build()); // TODO: consider case in which one sql passes conversion while others don't - sqlToRelNode(sqlStatements, catalogReader).stream() - .map(root -> SubstraitRelVisitor.convert(root, EXTENSION_COLLECTION, featureBoard)) - .forEach(root -> builder.addRoots(root)); - - return builder.build(); - } - - /** - * Converts one or more SQL statements into a Substrait {@link Plan}. - * - * @param sqlStatements a string containing one more SQL statements - * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in - * the SQL statements - * @return the Substrait {@link Plan} - * @throws SqlParseException if there is an error while parsing the SQL statements - */ - public Plan convertNew(String sqlStatements, Prepare.CatalogReader catalogReader) - throws SqlParseException { - Builder builder = io.substrait.plan.Plan.builder(); - builder.version(Version.builder().from(Version.DEFAULT_VERSION).producer("isthmus").build()); - - // TODO: consider case in which one sql passes conversion while others don't - SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader).stream() - .map(root -> SubstraitRelVisitor.convert(root, EXTENSION_COLLECTION, featureBoard)) - .forEach(root -> builder.addRoots(root)); - - return builder.build(); - } - - @VisibleForTesting - List sqlToRelNode(String sql, Prepare.CatalogReader catalogReader) - throws SqlParseException { - SqlValidator validator = new SubstraitSqlValidator(catalogReader); - SqlParser parser = SqlParser.create(sql, parserConfig); - SqlNodeList parsedList = parser.parseStmtList(); - SqlToRelConverter converter = createSqlToRelConverter(validator, catalogReader); - List roots = - parsedList.stream() - .map(parsed -> getBestExpRelRoot(converter, parsed)) - .collect(java.util.stream.Collectors.toList()); - return roots; - } - - void sqlToPlanRoots( - String sql, SqlValidator validator, Prepare.CatalogReader catalogReader, Builder builder) - throws SqlParseException { - List calciteOperations = sqlToCalciteOperation(sql, validator, catalogReader); + List calciteOperations = + SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader); CalciteOperationToSubstrait calciteOperationToSubstrait = new CalciteOperationToSubstrait(EXTENSION_COLLECTION, featureBoard); calciteOperations.stream() .map(operation -> operation.accept(calciteOperationToSubstrait)) .forEach(builder::addRoots); - } - - void sqlToPlanRoots1( - String sql, SqlValidator validator, Prepare.CatalogReader catalogReader, Builder builder) - throws SqlParseException { - - SqlParser parser = SqlParser.create(sql, parserConfig); - SqlNodeList parsedList = parser.parseStmtList(); - if (parsedList.isEmpty()) { - return; - } - - SqlToRelConverter converter = createSqlToRelConverter(validator, catalogReader); - DdlRelBuilder ddlRelBuilder = - new DdlRelBuilder( - converter, SqlToSubstrait::getBestExpRelRoot, EXTENSION_COLLECTION, featureBoard); - - List nonDdlNodes = new ArrayList<>(); - for (SqlNode sqlNode : parsedList) { - final io.substrait.plan.Plan.Root ddlRoot = sqlNode.accept(ddlRelBuilder); - if (ddlRoot != null) { - builder.addRoots(ddlRoot); - } else { - nonDdlNodes.add(sqlNode); - } - } - - if (!nonDdlNodes.isEmpty()) { - SqlNodeList dmlNodes = new SqlNodeList(nonDdlNodes, parsedList.getParserPosition()); - - List relRoots = sqlNodesToRelNode(dmlNodes, converter); - relRoots.stream() - .map(root -> SubstraitRelVisitor.convert(root, EXTENSION_COLLECTION, featureBoard)) - .forEach(builder::addRoots); - } - } - - private List sqlNodesToRelNode( - final SqlNodeList parsedList, final SqlToRelConverter converter) { - return parsedList.stream() - .map(parsed -> getBestExpRelRoot(converter, parsed)) - .collect(java.util.stream.Collectors.toList()); - } - - protected SqlToRelConverter createSqlToRelConverter( - SqlValidator validator, Prepare.CatalogReader catalogReader) { - SqlToRelConverter converter = - new SqlToRelConverter( - null, - validator, - catalogReader, - relOptCluster, - StandardConvertletTable.INSTANCE, - converterConfig); - return converter; - } - - protected static RelRoot getBestExpRelRoot(SqlToRelConverter converter, SqlNode parsed) { - RelRoot root = converter.convertQuery(parsed, true, true); - { - // RelBuilder seems to implicitly use the rule below, - // need to add to avoid discrepancies in assertFullRoundTrip - HepProgram program = HepProgram.builder().addRuleInstance(CoreRules.PROJECT_REMOVE).build(); - HepPlanner hepPlanner = new HepPlanner(program); - hepPlanner.setRoot(root.rel); - root = root.withRel(hepPlanner.findBestExp()); - } - return root; + return builder.build(); } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java index 4376a3eba..a4d1de274 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java +++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java @@ -31,4 +31,18 @@ public static List parseStatements(String sqlStatements) throws SqlPars SqlParser parser = SqlParser.create(sqlStatements, PARSER_CONFIG); return parser.parseStmtList(); } + + /** + * Parse one or more SQL statements to a list of {@link SqlNode}s. + * + * @param sqlStatements a string containing one or more SQL statements + * @param parserConfig sql parser configuration + * @return a list of {@link SqlNode}s corresponding to the given statements + * @throws SqlParseException if there is an error while parsing the SQL statements + */ + public static List parseStatements(String sqlStatements, SqlParser.Config parserConfig) + throws SqlParseException { + SqlParser parser = SqlParser.create(sqlStatements, parserConfig); + return parser.parseStmtList(); + } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java index 499f33f1d..a72f4ab9c 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java +++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java @@ -1,12 +1,16 @@ package io.substrait.isthmus.sql; +import io.substrait.isthmus.FeatureBoard; +import io.substrait.isthmus.ImmutableFeatureBoard; import io.substrait.isthmus.SubstraitTypeSystem; +import io.substrait.isthmus.operation.CalciteOperation; +import io.substrait.isthmus.operation.CalciteOperationBuilder; +import io.substrait.isthmus.operation.RelationalOperation; import java.util.List; import java.util.stream.Collectors; import org.apache.calcite.jdbc.JavaTypeFactoryImpl; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptPlanner; -import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.hep.HepPlanner; import org.apache.calcite.plan.hep.HepProgram; import org.apache.calcite.prepare.Prepare; @@ -16,6 +20,9 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.parser.SqlParseException; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.parser.ddl.SqlDdlParserImpl; +import org.apache.calcite.sql.validate.SqlConformanceEnum; import org.apache.calcite.sql.validate.SqlValidator; import org.apache.calcite.sql2rel.SqlToRelConverter; import org.apache.calcite.sql2rel.StandardConvertletTable; @@ -26,6 +33,62 @@ */ public class SubstraitSqlToCalcite { + protected static SqlParser.Config createDefaultParserConfig() { + return SqlParser.Config.DEFAULT + .withUnquotedCasing(createDefaultFeatureBoard().unquotedCasing()) + .withParserFactory(SqlDdlParserImpl.FACTORY) + .withConformance(SqlConformanceEnum.LENIENT); + } + + protected static FeatureBoard createDefaultFeatureBoard() { + return ImmutableFeatureBoard.builder().build(); + } + + protected static SqlToRelConverter createSqlToRelConverter( + SqlValidator validator, Prepare.CatalogReader catalogReader, RelOptCluster relOptCluster) { + return new SqlToRelConverter( + null, + validator, + catalogReader, + relOptCluster, + StandardConvertletTable.INSTANCE, + SqlToRelConverter.CONFIG); + } + + protected static RelRoot parseRelationalExpression(SqlToRelConverter converter, SqlNode sqlNode) { + RelRoot converted = converter.convertQuery(sqlNode, true, true); + return converted.withRel(removeRedundantProjects(converted.rel)); + } + + protected static RelOptCluster createDefaultRelOptCluster() { + RexBuilder rexBuilder = + new RexBuilder(new JavaTypeFactoryImpl(SubstraitTypeSystem.TYPE_SYSTEM)); + HepProgram program = HepProgram.builder().build(); + RelOptPlanner emptyPlanner = new HepPlanner(program); + return RelOptCluster.create(emptyPlanner, rexBuilder); + } + + protected static RelNode removeRedundantProjects(RelNode root) { + // The Calcite RelBuilder, when constructing Project that does not modify its inputs in any way, + // simply elides it. The PROJECT_REMOVE rule can be used to remove such projects from Rel trees. + // This facilitates roundtrip testing. + HepProgram program = HepProgram.builder().addRuleInstance(CoreRules.PROJECT_REMOVE).build(); + HepPlanner planner = new HepPlanner(program); + planner.setRoot(root); + return planner.findBestExp(); + } + + protected static CalciteOperation convert( + SqlNode sqlNode, + Prepare.CatalogReader catalogReader, + SqlValidator validator, + RelOptCluster cluster) { + SqlToRelConverter converter = createSqlToRelConverter(validator, catalogReader, cluster); + CalciteOperationBuilder operationBuilder = + new CalciteOperationBuilder(converter, SubstraitSqlToCalcite::parseRelationalExpression); + return sqlNode.accept(operationBuilder); + } + /** * Converts a SQL statement to a Calcite {@link RelRoot}. * @@ -35,41 +98,119 @@ public class SubstraitSqlToCalcite { * @return a {@link RelRoot} corresponding to the given SQL statement * @throws SqlParseException if there is an error while parsing the SQL statement */ - public static RelRoot convertQuery(String sqlStatement, Prepare.CatalogReader catalogReader) - throws SqlParseException { - SqlValidator validator = new SubstraitSqlValidator(catalogReader); - return convertQuery(sqlStatement, catalogReader, validator, createDefaultRelOptCluster()); + public static RelRoot convertRelationalQuery( + String sqlStatement, Prepare.CatalogReader catalogReader) throws SqlParseException { + return convertRelationalQuery( + sqlStatement, + catalogReader, + new SubstraitSqlValidator(catalogReader), + createDefaultParserConfig(), + createDefaultRelOptCluster()); } /** * Converts a SQL statement to a Calcite {@link RelRoot}. * + * @param sqlStatement a SQL statement string + * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in + * the SQL statement + * @param validator the {@link SqlValidator} used to validate the SQL statement. Allows for + * additional control of SQL functions and operators via {@link + * SqlValidator#getOperatorTable()} + * @param parserConfig the {@link SqlParser.Config} used for parsing the Sql statement + * @param cluster the {@link RelOptCluster} used when creating {@link RelNode}s during statement + * processing. Calcite expects that the {@link RelOptCluster} used during statement processing + * is the same as that used during query optimization. + * @return a {@link RelRoot} corresponding to the given SQL statement + * @throws SqlParseException if there is an error while parsing the SQL statement + */ + public static RelRoot convertRelationalQuery( + String sqlStatement, + Prepare.CatalogReader catalogReader, + SqlValidator validator, + SqlParser.Config parserConfig, + RelOptCluster cluster) + throws SqlParseException { + CalciteOperation calciteOperation = + convertQuery(sqlStatement, catalogReader, validator, parserConfig, cluster); + if (calciteOperation instanceof RelationalOperation) { + return ((RelationalOperation) calciteOperation).getRelRoot(); + } + throw new IllegalArgumentException( + String.format("Non-relational algebra statement: %s", sqlStatement)); + } + + /** + * Converts a SQL statement to a CalciteOperation {@link CalciteOperation}. + * + * @param sqlStatement a SQL statement + * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in + * the SQL statement + * @return {@link CalciteOperation} corresponding to the given SQL statement + * @throws SqlParseException if there is an error while parsing the SQL statement string + */ + public static CalciteOperation convertQuery( + String sqlStatement, Prepare.CatalogReader catalogReader) throws SqlParseException { + return convertQuery( + sqlStatement, + catalogReader, + new SubstraitSqlValidator(catalogReader), + createDefaultParserConfig(), + createDefaultRelOptCluster()); + } + + /** + * Converts a parsed sql statement (SqlNode) to a CalciteOperation {@link CalciteOperation}. + * + * @param sqlNode a parsed SQL statement + * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in + * the SQL statement + * @param validator the {@link SqlValidator} used to validate the SQL statement. Allows for + * additional control of SQL functions and operators via {@link + * SqlValidator#getOperatorTable()} + * @param cluster the {@link RelOptCluster} used when creating {@link RelNode}s during statement + * processing. Calcite expects that the {@link RelOptCluster} used during statement processing + * is the same as that used during query optimization. + * @return {@link CalciteOperation} corresponding to the given SQL statement + */ + public static CalciteOperation convertQuery( + SqlNode sqlNode, + Prepare.CatalogReader catalogReader, + SqlValidator validator, + RelOptCluster cluster) { + return convert(sqlNode, catalogReader, validator, cluster); + } + + /** + * Converts a SQL statement to a CalciteOperation {@link CalciteOperation}. + * * @param sqlStatement a SQL statement * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in * the SQL statement * @param validator the {@link SqlValidator} used to validate the SQL statement. Allows for * additional control of SQL functions and operators via {@link * SqlValidator#getOperatorTable()} + * @param parserConfig the {@link SqlParser.Config} used for parsing the Sql statement * @param cluster the {@link RelOptCluster} used when creating {@link RelNode}s during statement * processing. Calcite expects that the {@link RelOptCluster} used during statement processing * is the same as that used during query optimization. - * @return {@link RelRoot} corresponding to the given SQL statement + * @return {@link CalciteOperation} corresponding to the given SQL statement * @throws SqlParseException if there is an error while parsing the SQL statement string */ - public static RelRoot convertQuery( + public static CalciteOperation convertQuery( String sqlStatement, Prepare.CatalogReader catalogReader, SqlValidator validator, + SqlParser.Config parserConfig, RelOptCluster cluster) throws SqlParseException { - List sqlNodes = SubstraitSqlStatementParser.parseStatements(sqlStatement); + List sqlNodes = + SubstraitSqlStatementParser.parseStatements(sqlStatement, parserConfig); if (sqlNodes.size() != 1) { throw new IllegalArgumentException( - String.format("Expected one statement, found: %d", sqlNodes.size())); + String.format("Expected one statement, found %d", sqlNodes.size())); } - List relRoots = convert(sqlNodes, catalogReader, validator, cluster); - // as there was only 1 statement, there should only be 1 root - return relRoots.get(0); + return convertQuery(sqlNodes.get(0), catalogReader, validator, cluster); } /** @@ -82,10 +223,15 @@ public static RelRoot convertQuery( * @return a list of {@link RelRoot}s corresponding to the given SQL statements * @throws SqlParseException if there is an error while parsing the SQL statements */ - public static List convertQueries( + public static List convertRelationalQueries( String sqlStatements, Prepare.CatalogReader catalogReader) throws SqlParseException { SqlValidator validator = new SubstraitSqlValidator(catalogReader); - return convertQueries(sqlStatements, catalogReader, validator, createDefaultRelOptCluster()); + return convertRelationalQueries( + sqlStatements, + catalogReader, + validator, + createDefaultParserConfig(), + createDefaultRelOptCluster()); } /** @@ -98,66 +244,48 @@ public static List convertQueries( * @param validator the {@link SqlValidator} used to validate SQL statements. Allows for * additional control of SQL functions and operators via {@link * SqlValidator#getOperatorTable()} + * @param parserConfig the {@link SqlParser.Config} used for parsing the Sql statement * * @param cluster the {@link RelOptCluster} used when creating {@link RelNode}s during statement * processing. Calcite expects that the {@link RelOptCluster} used during statement processing * is the same as that used during query optimization. * @return a list of {@link RelRoot}s corresponding to the given SQL statements * @throws SqlParseException if there is an error while parsing the SQL statements */ - public static List convertQueries( + public static List convertRelationalQueries( String sqlStatements, Prepare.CatalogReader catalogReader, SqlValidator validator, + SqlParser.Config parserConfig, RelOptCluster cluster) throws SqlParseException { - List sqlNodes = SubstraitSqlStatementParser.parseStatements(sqlStatements); - return convert(sqlNodes, catalogReader, validator, cluster); - } - - static List convert( - List sqlNodes, - Prepare.CatalogReader catalogReader, - SqlValidator validator, - RelOptCluster cluster) { - RelOptTable.ViewExpander viewExpander = null; - SqlToRelConverter converter = - new SqlToRelConverter( - viewExpander, - validator, - catalogReader, - cluster, - StandardConvertletTable.INSTANCE, - SqlToRelConverter.CONFIG); - // apply validation - boolean needsValidation = true; - // query is the root of the tree - boolean top = true; + List sqlNodes = + SubstraitSqlStatementParser.parseStatements(sqlStatements, parserConfig); return sqlNodes.stream() - .map( - sqlNode -> - removeRedundantProjects(converter.convertQuery(sqlNode, needsValidation, top))) - .collect(Collectors.toList()); - } - - static RelOptCluster createDefaultRelOptCluster() { - RexBuilder rexBuilder = - new RexBuilder(new JavaTypeFactoryImpl(SubstraitTypeSystem.TYPE_SYSTEM)); - HepProgram program = HepProgram.builder().build(); - RelOptPlanner emptyPlanner = new HepPlanner(program); - return RelOptCluster.create(emptyPlanner, rexBuilder); + .map(sqlNode -> convert(sqlNode, catalogReader, validator, cluster)) + .filter(calciteOperation -> calciteOperation instanceof RelationalOperation) + .map(calciteOperation -> ((RelationalOperation) calciteOperation).getRelRoot()) + .collect(Collectors.toUnmodifiableList()); } - static RelRoot removeRedundantProjects(RelRoot root) { - return root.withRel(removeRedundantProjects(root.rel)); - } + /** + * Converts one or more SQL statements to a List of {@link CalciteOperation}, with one {@link + * CalciteOperation} per statement. + * + * @param sqlStatements a string containing one or more SQL statements + * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in + * the SQL statements + * @return a list of {@link CalciteOperation}s corresponding to the given SQL statements + * @throws SqlParseException if there is an error while parsing the SQL statements + */ + public static List convertQueries( + String sqlStatements, Prepare.CatalogReader catalogReader) throws SqlParseException { + List sqlNodes = + SubstraitSqlStatementParser.parseStatements(sqlStatements, createDefaultParserConfig()); + SqlValidator validator = new SubstraitSqlValidator(catalogReader); + RelOptCluster cluster = createDefaultRelOptCluster(); - static RelNode removeRedundantProjects(RelNode root) { - // The Calcite RelBuilder, when constructing Project that does not modify its inputs in any way, - // simply elides it. The PROJECT_REMOVE rule can be used to remove such projects from Rel trees. - // This facilitates roundtrip testing. - HepProgram program = HepProgram.builder().addRuleInstance(CoreRules.PROJECT_REMOVE).build(); - HepPlanner planner = new HepPlanner(program); - planner.setRoot(root); - return planner.findBestExp(); + return sqlNodes.stream() + .map(sqlNode -> convert(sqlNode, catalogReader, validator, cluster)) + .collect(Collectors.toUnmodifiableList()); } } diff --git a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java index 932f657b8..e817e9c9c 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java @@ -66,7 +66,7 @@ void testPlanRoundTrip(String sqlStatement) throws Exception { assertPlanRoundtrip(plan); assertCalciteOperationSubstraitRelRoundTrip(sqlStatement, catalogReader); - assertFullRoundTrip(sqlStatement, catalogReader); + // assertFullRoundTrip(sqlStatement, catalogReader); } @Test diff --git a/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java index d462ea05d..975acb86d 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java @@ -26,7 +26,7 @@ void preserveNamesFromSql() throws Exception { List expectedNames = List.of("a", "B"); org.apache.calcite.rel.RelRoot calciteRelRoot1 = - SubstraitSqlToCalcite.convertQuery(query, catalogReader); + SubstraitSqlToCalcite.convertRelationalQuery(query, catalogReader); assertEquals(expectedNames, calciteRelRoot1.validatedRowType.getFieldNames()); io.substrait.plan.Plan.Root substraitRelRoot = diff --git a/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java b/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java index fd9572b2e..305c051ac 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java @@ -23,7 +23,7 @@ void conversionHandlesBuiltInSum0CallAddedByRule() throws SqlParseException, IOE // verify that the query works generally assertFullRoundTrip(query); - RelRoot relRoot = SubstraitSqlToCalcite.convertQuery(query, TPCH_CATALOG); + RelRoot relRoot = SubstraitSqlToCalcite.convertRelationalQuery(query, TPCH_CATALOG); RelNode originalPlan = relRoot.rel; // Create a program to apply the AGGREGATE_EXPAND_DISTINCT_AGGREGATES_TO_JOIN rule. diff --git a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java index 56173f1ea..265546e27 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java +++ b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java @@ -178,7 +178,7 @@ protected void assertFullRoundTrip(String sqlQuery, Prepare.CatalogReader catalo ExtensionCollector extensionCollector = new ExtensionCollector(); // SQL -> Calcite 1 - RelRoot calcite1 = SubstraitSqlToCalcite.convertQuery(sqlQuery, catalogReader); + RelRoot calcite1 = SubstraitSqlToCalcite.convertRelationalQuery(sqlQuery, catalogReader); // Calcite 1 -> Substrait POJO 1 Plan.Root root1 = SubstraitRelVisitor.convert(calcite1, extensions); From 4be421bc5c6bfa935e13e5cb56c3eb4b6237882f Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Tue, 8 Jul 2025 10:25:12 +0200 Subject: [PATCH 05/17] feat(istmus): add ddl support to sql->substrait # Conflicts: # isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java # Conflicts: # isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java # Conflicts: # isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java # isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java --- .../substrait/isthmus/DdlRoundtripTest.java | 40 ++----------------- 1 file changed, 3 insertions(+), 37 deletions(-) diff --git a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java index e817e9c9c..579fc938d 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java @@ -2,12 +2,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals; -import io.substrait.isthmus.operation.CalciteOperation; import io.substrait.isthmus.sql.SubstraitCreateStatementParser; import io.substrait.plan.Plan; import io.substrait.plan.PlanProtoConverter; import io.substrait.plan.ProtoPlanConverter; -import io.substrait.relation.Rel; import org.apache.calcite.prepare.Prepare; import org.apache.calcite.sql.parser.SqlParseException; import org.junit.jupiter.api.Test; @@ -30,47 +28,15 @@ void testSqlToSubstrait(String sqlStatement) throws SqlParseException { assertEquals(protoPlan, protoPlan1); } - protected void assertCalciteOperationSubstraitRelRoundTrip( - String query, Prepare.CatalogReader catalogReader) throws Exception { - // sql <--> substrait round trip test. - // Assert (sql -> calcite -> substrait) and (sql -> substrait -> calcite -> substrait) are same. - // Return list of sql -> Substrait rel -> Calcite rel. - SqlToSubstrait s = new SqlToSubstrait(); - - // 1. SQL -> Substrait Plan - Plan plan1 = s.convert(query, catalogReader); - - // 2. Substrait Plan -> Substrait Rel - Plan.Root pojo1 = plan1.getRoots().get(0); - - // 3. Substrait Rel -> CalciteOperation - - SubstraitToCalciteOperation substraitToCalciteOperation = - new SubstraitToCalciteOperation(extensions, typeFactory, catalogReader, s.parserConfig); - Rel rel = plan1.getRoots().get(0).getInput(); - CalciteOperation calciteOperation = - rel.accept(substraitToCalciteOperation, SubstraitRelNodeConverter.Context.newContext()); - - // 4. CalciteOperation -> Substrait Rel - CalciteOperationToSubstrait calciteOperationToSubstrait = - new CalciteOperationToSubstrait(extensions, s.featureBoard); - Plan.Root pojo2 = calciteOperation.accept(calciteOperationToSubstrait); - - assertEquals(pojo1, pojo2); - } - - void testPlanRoundTrip(String sqlStatement) throws Exception { + void testPlanRoundTrip(String sqlStatement) throws SqlParseException { SqlToSubstrait sql2subst = new SqlToSubstrait(); final Plan plan = sql2subst.convert(sqlStatement, catalogReader); assertPlanRoundtrip(plan); - assertCalciteOperationSubstraitRelRoundTrip(sqlStatement, catalogReader); - - // assertFullRoundTrip(sqlStatement, catalogReader); } @Test - void testCreateTable() throws Exception { + void testCreateTable() throws SqlParseException { String sql = "create table dst1 as select * from src1"; testSqlToSubstrait(sql); // TBD: full roundtrip is not possible because there is no relational algebra for DDL @@ -78,7 +44,7 @@ void testCreateTable() throws Exception { } @Test - void testCreateView() throws Exception { + void testCreateView() throws SqlParseException { String sql = "create view dst1 as select * from src1"; testSqlToSubstrait(sql); testPlanRoundTrip(sql); From e1a524b27b842a2eba6581c4c1acda2e8e821a0c Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Wed, 20 Aug 2025 15:40:04 +0200 Subject: [PATCH 06/17] refactor(isthmus): introduce CacliteOperation to represent rel algebra and ddl --- .../substrait/isthmus/DdlRoundtripTest.java | 40 +++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java index 579fc938d..932f657b8 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java @@ -2,10 +2,12 @@ import static org.junit.jupiter.api.Assertions.assertEquals; +import io.substrait.isthmus.operation.CalciteOperation; import io.substrait.isthmus.sql.SubstraitCreateStatementParser; import io.substrait.plan.Plan; import io.substrait.plan.PlanProtoConverter; import io.substrait.plan.ProtoPlanConverter; +import io.substrait.relation.Rel; import org.apache.calcite.prepare.Prepare; import org.apache.calcite.sql.parser.SqlParseException; import org.junit.jupiter.api.Test; @@ -28,15 +30,47 @@ void testSqlToSubstrait(String sqlStatement) throws SqlParseException { assertEquals(protoPlan, protoPlan1); } - void testPlanRoundTrip(String sqlStatement) throws SqlParseException { + protected void assertCalciteOperationSubstraitRelRoundTrip( + String query, Prepare.CatalogReader catalogReader) throws Exception { + // sql <--> substrait round trip test. + // Assert (sql -> calcite -> substrait) and (sql -> substrait -> calcite -> substrait) are same. + // Return list of sql -> Substrait rel -> Calcite rel. + SqlToSubstrait s = new SqlToSubstrait(); + + // 1. SQL -> Substrait Plan + Plan plan1 = s.convert(query, catalogReader); + + // 2. Substrait Plan -> Substrait Rel + Plan.Root pojo1 = plan1.getRoots().get(0); + + // 3. Substrait Rel -> CalciteOperation + + SubstraitToCalciteOperation substraitToCalciteOperation = + new SubstraitToCalciteOperation(extensions, typeFactory, catalogReader, s.parserConfig); + Rel rel = plan1.getRoots().get(0).getInput(); + CalciteOperation calciteOperation = + rel.accept(substraitToCalciteOperation, SubstraitRelNodeConverter.Context.newContext()); + + // 4. CalciteOperation -> Substrait Rel + CalciteOperationToSubstrait calciteOperationToSubstrait = + new CalciteOperationToSubstrait(extensions, s.featureBoard); + Plan.Root pojo2 = calciteOperation.accept(calciteOperationToSubstrait); + + assertEquals(pojo1, pojo2); + } + + void testPlanRoundTrip(String sqlStatement) throws Exception { SqlToSubstrait sql2subst = new SqlToSubstrait(); final Plan plan = sql2subst.convert(sqlStatement, catalogReader); assertPlanRoundtrip(plan); + assertCalciteOperationSubstraitRelRoundTrip(sqlStatement, catalogReader); + + assertFullRoundTrip(sqlStatement, catalogReader); } @Test - void testCreateTable() throws SqlParseException { + void testCreateTable() throws Exception { String sql = "create table dst1 as select * from src1"; testSqlToSubstrait(sql); // TBD: full roundtrip is not possible because there is no relational algebra for DDL @@ -44,7 +78,7 @@ void testCreateTable() throws SqlParseException { } @Test - void testCreateView() throws SqlParseException { + void testCreateView() throws Exception { String sql = "create view dst1 as select * from src1"; testSqlToSubstrait(sql); testPlanRoundTrip(sql); From 5ce1cb501dd2343b8178cd6850e09eb0f43ecd21 Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Thu, 21 Aug 2025 17:24:23 +0200 Subject: [PATCH 07/17] chore(isthmus): rebase + refactoring --- .../src/test/java/io/substrait/isthmus/DdlRoundtripTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java index 932f657b8..e817e9c9c 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java @@ -66,7 +66,7 @@ void testPlanRoundTrip(String sqlStatement) throws Exception { assertPlanRoundtrip(plan); assertCalciteOperationSubstraitRelRoundTrip(sqlStatement, catalogReader); - assertFullRoundTrip(sqlStatement, catalogReader); + // assertFullRoundTrip(sqlStatement, catalogReader); } @Test From f9a377d407ef90cf03a9a0fb4ef49fd38ce09087 Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Wed, 10 Sep 2025 11:55:07 +0200 Subject: [PATCH 08/17] fix(isthmus): implement ddl operatoins sas relnode --- .../isthmus/CalciteOperationToSubstrait.java | 79 ------ .../{operation => }/SqlKindFromRel.java | 28 +- .../io/substrait/isthmus/SqlToSubstrait.java | 12 +- .../isthmus/SubstraitRelNodeConverter.java | 25 ++ .../isthmus/SubstraitRelVisitor.java | 47 ++++ .../substrait/isthmus/SubstraitToCalcite.java | 5 +- .../isthmus/SubstraitToCalciteOperation.java | 77 ------ .../isthmus/calcite/rel/CreateTable.java | 41 +++ .../isthmus/calcite/rel/CreateView.java | 39 +++ .../calcite/rel/DdlSqlToRelConverter.java | 64 +++++ .../isthmus/expression/DdlRelBuilder.java | 123 --------- .../isthmus/operation/CalciteOperation.java | 7 - .../CalciteOperationBasicVisitor.java | 38 --- .../operation/CalciteOperationBuilder.java | 69 ----- .../operation/CalciteOperationVisitor.java | 17 -- .../substrait/isthmus/operation/Create.java | 33 --- .../isthmus/operation/CreateTableAs.java | 15 - .../isthmus/operation/CreateView.java | 15 - .../isthmus/operation/CreateWithInput.java | 27 -- .../isthmus/operation/DdlOperation.java | 28 -- .../operation/RelationalOperation.java | 20 -- .../sql/SubstraitSqlStatementParser.java | 18 +- .../isthmus/sql/SubstraitSqlToCalcite.java | 256 +++++------------- .../substrait/isthmus/DdlRoundtripTest.java | 61 +---- .../substrait/isthmus/DmlRoundtripTest.java | 10 +- .../substrait/isthmus/NameRoundtripTest.java | 3 +- .../isthmus/OptimizerIntegrationTest.java | 2 +- .../io/substrait/isthmus/PlanTestBase.java | 68 ++++- 28 files changed, 373 insertions(+), 854 deletions(-) delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/CalciteOperationToSubstrait.java rename isthmus/src/main/java/io/substrait/isthmus/{operation => }/SqlKindFromRel.java (83%) delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalciteOperation.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/calcite/rel/DdlSqlToRelConverter.java delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/expression/DdlRelBuilder.java delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperation.java delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBasicVisitor.java delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBuilder.java delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationVisitor.java delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/Create.java delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/CreateTableAs.java delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/CreateView.java delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/CreateWithInput.java delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/DdlOperation.java delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/operation/RelationalOperation.java diff --git a/isthmus/src/main/java/io/substrait/isthmus/CalciteOperationToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/CalciteOperationToSubstrait.java deleted file mode 100644 index d21f20ff9..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/CalciteOperationToSubstrait.java +++ /dev/null @@ -1,79 +0,0 @@ -package io.substrait.isthmus; - -import io.substrait.expression.Expression; -import io.substrait.expression.ExpressionCreator; -import io.substrait.extension.SimpleExtension; -import io.substrait.isthmus.operation.CalciteOperationBasicVisitor; -import io.substrait.isthmus.operation.CreateTableAs; -import io.substrait.isthmus.operation.CreateView; -import io.substrait.isthmus.operation.RelationalOperation; -import io.substrait.plan.Plan; -import io.substrait.relation.AbstractDdlRel; -import io.substrait.relation.AbstractWriteRel; -import io.substrait.relation.NamedDdl; -import io.substrait.relation.NamedWrite; -import io.substrait.type.NamedStruct; -import org.apache.calcite.rel.RelRoot; -import org.apache.calcite.rel.type.RelDataType; - -public class CalciteOperationToSubstrait extends CalciteOperationBasicVisitor { - private final SimpleExtension.ExtensionCollection extensionCollection; - private final FeatureBoard featureBoard; - - public CalciteOperationToSubstrait( - SimpleExtension.ExtensionCollection extensionCollection, FeatureBoard featureBoard) { - this.extensionCollection = extensionCollection; - this.featureBoard = featureBoard; - } - - private NamedStruct getSchema(final RelRoot queryRelRoot) { - final RelDataType rowType = queryRelRoot.rel.getRowType(); - - final TypeConverter typeConverter = TypeConverter.DEFAULT; - return typeConverter.toNamedStruct(rowType); - } - - @Override - public Plan.Root visit(RelationalOperation relationalOperation) { - return SubstraitRelVisitor.convert( - relationalOperation.getRelRoot(), extensionCollection, featureBoard); - } - - @Override - public Plan.Root visit(CreateTableAs createTableAs) { - RelRoot input = createTableAs.getInput(); - Plan.Root rel = SubstraitRelVisitor.convert(input, extensionCollection, featureBoard); - NamedStruct schema = getSchema(input); - - NamedWrite namedWrite = - NamedWrite.builder() - .input(rel.getInput()) - .tableSchema(schema) - .operation(AbstractWriteRel.WriteOp.CTAS) - .createMode(AbstractWriteRel.CreateMode.REPLACE_IF_EXISTS) - .outputMode(AbstractWriteRel.OutputMode.NO_OUTPUT) - .names(createTableAs.getNames()) - .build(); - - return Plan.Root.builder().input(namedWrite).build(); - } - - @Override - public Plan.Root visit(CreateView createView) { - RelRoot input = createView.getInput(); - Plan.Root rel = SubstraitRelVisitor.convert(input, extensionCollection, featureBoard); - final Expression.StructLiteral defaults = ExpressionCreator.struct(false); - - final NamedDdl namedDdl = - NamedDdl.builder() - .viewDefinition(rel.getInput()) - .tableSchema(getSchema(input)) - .tableDefaults(defaults) - .operation(AbstractDdlRel.DdlOp.CREATE) - .object(AbstractDdlRel.DdlObject.VIEW) - .names(createView.getNames()) - .build(); - - return Plan.Root.builder().input(namedDdl).build(); - } -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/SqlKindFromRel.java b/isthmus/src/main/java/io/substrait/isthmus/SqlKindFromRel.java similarity index 83% rename from isthmus/src/main/java/io/substrait/isthmus/operation/SqlKindFromRel.java rename to isthmus/src/main/java/io/substrait/isthmus/SqlKindFromRel.java index 4eb5b69f8..0a1f475c7 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/operation/SqlKindFromRel.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlKindFromRel.java @@ -1,4 +1,4 @@ -package io.substrait.isthmus.operation; +package io.substrait.isthmus; import io.substrait.relation.Aggregate; import io.substrait.relation.ConsistentPartitionWindow; @@ -43,7 +43,7 @@ public class SqlKindFromRel @Override public SqlKind visit(Aggregate aggregate, EmptyVisitationContext context) throws RuntimeException { - // Aggregation is a core part of a query (DQL). + return QUERY_KIND; } @@ -56,25 +56,21 @@ public SqlKind visit(EmptyScan emptyScan, EmptyVisitationContext context) @Override public SqlKind visit(Fetch fetch, EmptyVisitationContext context) throws RuntimeException { - // Fetch corresponds to LIMIT/OFFSET, a part of a query. return QUERY_KIND; } @Override public SqlKind visit(Filter filter, EmptyVisitationContext context) throws RuntimeException { - // Filter corresponds to a WHERE clause, a part of a query. return QUERY_KIND; } @Override public SqlKind visit(Join join, EmptyVisitationContext context) throws RuntimeException { - // A logical join operation. return SqlKind.JOIN; } @Override public SqlKind visit(Set set, EmptyVisitationContext context) throws RuntimeException { - // Maps Substrait's Set operations to their SQL equivalents. switch (set.getSetOp()) { case UNION_ALL: case UNION_DISTINCT: @@ -96,38 +92,32 @@ public SqlKind visit(Set set, EmptyVisitationContext context) throws RuntimeExce @Override public SqlKind visit(NamedScan namedScan, EmptyVisitationContext context) throws RuntimeException { - // A scan from a named table is the start of a query. return QUERY_KIND; } @Override public SqlKind visit(LocalFiles localFiles, EmptyVisitationContext context) throws RuntimeException { - // A scan from local files is a type of query input. return QUERY_KIND; } @Override public SqlKind visit(Project project, EmptyVisitationContext context) throws RuntimeException { - // Project corresponds to the SELECT clause of a query. return QUERY_KIND; } @Override public SqlKind visit(Expand expand, EmptyVisitationContext context) throws RuntimeException { - // Expand is a relational operator used in queries (e.g., for grouping sets). return QUERY_KIND; } @Override public SqlKind visit(Sort sort, EmptyVisitationContext context) throws RuntimeException { - // A sort operation directly maps to ORDER BY. return SqlKind.ORDER_BY; } @Override public SqlKind visit(Cross cross, EmptyVisitationContext context) throws RuntimeException { - // A cross join is a type of join. return SqlKind.JOIN; } @@ -141,48 +131,41 @@ public SqlKind visit(VirtualTableScan virtualTableScan, EmptyVisitationContext c @Override public SqlKind visit(ExtensionLeaf extensionLeaf, EmptyVisitationContext context) throws RuntimeException { - // Unknown extension node. return SqlKind.OTHER; } @Override public SqlKind visit(ExtensionSingle extensionSingle, EmptyVisitationContext context) throws RuntimeException { - // Unknown extension node. return SqlKind.OTHER; } @Override public SqlKind visit(ExtensionMulti extensionMulti, EmptyVisitationContext context) throws RuntimeException { - // Unknown extension node. return SqlKind.OTHER; } @Override public SqlKind visit(ExtensionTable extensionTable, EmptyVisitationContext context) throws RuntimeException { - // Unknown extension node. return SqlKind.OTHER; } @Override public SqlKind visit(HashJoin hashJoin, EmptyVisitationContext context) throws RuntimeException { - // A physical hash join is a type of join. return SqlKind.JOIN; } @Override public SqlKind visit(MergeJoin mergeJoin, EmptyVisitationContext context) throws RuntimeException { - // A physical merge join is a type of join. return SqlKind.JOIN; } @Override public SqlKind visit(NestedLoopJoin nestedLoopJoin, EmptyVisitationContext context) throws RuntimeException { - // A physical nested loop join is a type of join. return SqlKind.JOIN; } @@ -190,13 +173,11 @@ public SqlKind visit(NestedLoopJoin nestedLoopJoin, EmptyVisitationContext conte public SqlKind visit( ConsistentPartitionWindow consistentPartitionWindow, EmptyVisitationContext context) throws RuntimeException { - // This relation represents a window function operation. return SqlKind.OVER; } @Override public SqlKind visit(NamedWrite write, EmptyVisitationContext context) throws RuntimeException { - // DML and CTAS write operations. switch (write.getOperation()) { case INSERT: return SqlKind.INSERT; @@ -214,13 +195,11 @@ public SqlKind visit(NamedWrite write, EmptyVisitationContext context) throws Ru @Override public SqlKind visit(ExtensionWrite write, EmptyVisitationContext context) throws RuntimeException { - // Custom DML/DDL operations are best mapped to OTHER or OTHER_DDL. return SqlKind.OTHER_DDL; } @Override public SqlKind visit(NamedDdl ddl, EmptyVisitationContext context) throws RuntimeException { - // DDL operations, determined by combining the operation and the object type. switch (ddl.getOperation()) { case CREATE: case CREATE_OR_REPLACE: @@ -251,14 +230,11 @@ public SqlKind visit(NamedDdl ddl, EmptyVisitationContext context) throws Runtim @Override public SqlKind visit(ExtensionDdl ddl, EmptyVisitationContext context) throws RuntimeException { - // A custom DDL operation. return SqlKind.OTHER_DDL; } @Override public SqlKind visit(NamedUpdate update, EmptyVisitationContext context) throws RuntimeException { - // An update operation is a specific DML type. Note that modern Substrait - // producers should use NamedWrite with WriteOp.UPDATE instead. return SqlKind.UPDATE; } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java index 1d1398a1a..3e19ca58c 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java @@ -1,12 +1,10 @@ package io.substrait.isthmus; -import io.substrait.isthmus.operation.CalciteOperation; import io.substrait.isthmus.sql.SubstraitSqlToCalcite; import io.substrait.plan.ImmutablePlan.Builder; import io.substrait.plan.Plan; import io.substrait.plan.Plan.Version; import io.substrait.plan.PlanProtoConverter; -import java.util.List; import org.apache.calcite.prepare.Prepare; import org.apache.calcite.sql.parser.SqlParseException; @@ -55,13 +53,9 @@ public Plan convert(String sqlStatements, Prepare.CatalogReader catalogReader) builder.version(Version.builder().from(Version.DEFAULT_VERSION).producer("isthmus").build()); // TODO: consider case in which one sql passes conversion while others don't - List calciteOperations = - SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader); - CalciteOperationToSubstrait calciteOperationToSubstrait = - new CalciteOperationToSubstrait(EXTENSION_COLLECTION, featureBoard); - calciteOperations.stream() - .map(operation -> operation.accept(calciteOperationToSubstrait)) - .forEach(builder::addRoots); + SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader).stream() + .map(root -> SubstraitRelVisitor.convert(root, EXTENSION_COLLECTION, featureBoard)) + .forEach(root -> builder.addRoots(root)); return builder.build(); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java index d1ce8a13b..91e39a73d 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java @@ -10,6 +10,8 @@ import io.substrait.expression.Expression.SortDirection; import io.substrait.expression.FunctionArg; import io.substrait.extension.SimpleExtension; +import io.substrait.isthmus.calcite.rel.CreateTable; +import io.substrait.isthmus.calcite.rel.CreateView; import io.substrait.isthmus.expression.AggregateFunctionConverter; import io.substrait.isthmus.expression.ExpressionRexConverter; import io.substrait.isthmus.expression.ScalarFunctionConverter; @@ -24,6 +26,7 @@ import io.substrait.relation.Join; import io.substrait.relation.Join.JoinType; import io.substrait.relation.LocalFiles; +import io.substrait.relation.NamedDdl; import io.substrait.relation.NamedScan; import io.substrait.relation.NamedUpdate; import io.substrait.relation.NamedWrite; @@ -53,6 +56,7 @@ import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.JoinRelType; @@ -68,6 +72,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexSlot; import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.tools.Frameworks; @@ -547,6 +552,17 @@ public RelNode visit(NamedUpdate update, Context context) { false); } + @Override + public RelNode visit(NamedDdl namedDdl, Context context) { + if (namedDdl.getViewDefinition().isEmpty()) { + throw new IllegalArgumentException("no view definition found"); + } + Rel viewDefinition = namedDdl.getViewDefinition().get(); + RelNode relNode = viewDefinition.accept(this, context); + RelRoot relRoot = RelRoot.of(relNode, SqlKind.SELECT); + return new CreateView(namedDdl.getNames(), relRoot); + } + @Override public RelNode visit(VirtualTableScan virtualTableScan, Context context) { @@ -584,6 +600,13 @@ public RelNode visit(VirtualTableScan virtualTableScan, Context context) { relBuilder.getCluster(), rowTypeWithNames, ImmutableList.copyOf(tuples)); } + private RelNode handleCreateTableAs(NamedWrite namedWrite, Context context) { + Rel input = namedWrite.getInput(); + RelNode relNode = input.accept(this, context); + RelRoot relRoot = RelRoot.of(relNode, SqlKind.SELECT); + return new CreateTable(namedWrite.getNames(), relRoot); + } + @Override public RelNode visit(NamedWrite write, Context context) { RelNode input = write.getInput().accept(this, context); @@ -599,6 +622,8 @@ public RelNode visit(NamedWrite write, Context context) { case DELETE: operation = TableModify.Operation.DELETE; break; + case CTAS: + return handleCreateTableAs(write, context); default: throw new UnsupportedOperationException( "Write operation '" diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java index cc41e84a0..88de080e2 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java @@ -6,6 +6,8 @@ import io.substrait.expression.ExpressionCreator; import io.substrait.expression.FieldReference; import io.substrait.extension.SimpleExtension; +import io.substrait.isthmus.calcite.rel.CreateTable; +import io.substrait.isthmus.calcite.rel.CreateView; import io.substrait.isthmus.expression.AggregateFunctionConverter; import io.substrait.isthmus.expression.CallConverters; import io.substrait.isthmus.expression.LiteralConverter; @@ -13,6 +15,7 @@ import io.substrait.isthmus.expression.ScalarFunctionConverter; import io.substrait.isthmus.expression.WindowFunctionConverter; import io.substrait.plan.Plan; +import io.substrait.relation.AbstractDdlRel; import io.substrait.relation.AbstractWriteRel; import io.substrait.relation.Aggregate; import io.substrait.relation.Aggregate.Grouping; @@ -25,6 +28,7 @@ import io.substrait.relation.ImmutableMeasure.Builder; import io.substrait.relation.Join; import io.substrait.relation.Join.JoinType; +import io.substrait.relation.NamedDdl; import io.substrait.relation.NamedScan; import io.substrait.relation.NamedUpdate; import io.substrait.relation.NamedWrite; @@ -49,6 +53,7 @@ import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.TableModify; +import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexFieldAccess; @@ -445,8 +450,50 @@ public Rel visit(TableModify modify) { } } + private NamedStruct getSchema(final RelRoot queryRelRoot) { + final RelDataType rowType = queryRelRoot.rel.getRowType(); + return typeConverter.toNamedStruct(rowType); + } + + public Rel handleCreateTable(CreateTable createTable) { + RelRoot input = createTable.getInput(); + Rel inputRel = apply(input.project()); + NamedStruct schema = getSchema(input); + return NamedWrite.builder() + .input(inputRel) + .tableSchema(schema) + .operation(AbstractWriteRel.WriteOp.CTAS) + .createMode(AbstractWriteRel.CreateMode.REPLACE_IF_EXISTS) + .outputMode(AbstractWriteRel.OutputMode.NO_OUTPUT) + .names(createTable.getNames()) + .build(); + } + + public Rel handleCreateView(CreateView createView) { + RelRoot input = createView.getInput(); + Rel inputRel = apply(input.project()); + + final Expression.StructLiteral defaults = ExpressionCreator.struct(false); + + return NamedDdl.builder() + .viewDefinition(inputRel) + .tableSchema(getSchema(input)) + .tableDefaults(defaults) + .operation(AbstractDdlRel.DdlOp.CREATE) + .object(AbstractDdlRel.DdlObject.VIEW) + .names(createView.getNames()) + .build(); + } + @Override public Rel visitOther(RelNode other) { + if (other instanceof CreateTable) { + return handleCreateTable((CreateTable) other); + + } else if (other instanceof CreateView) { + return handleCreateView((CreateView) other); + } + throw new UnsupportedOperationException("Unable to handle node: " + other); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java index f0d3e2137..8dcfbf9e0 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalcite.java @@ -158,10 +158,11 @@ public RelRoot convert(Plan.Root root) { } return RelRoot.of(tableModify, tableRowType, kind); } - + SqlKindFromRel sqlKindFromRel = new SqlKindFromRel(); + SqlKind kind = root.getInput().accept(sqlKindFromRel, EmptyVisitationContext.INSTANCE); RelDataType inputRowType = convertedNode.getRowType(); RelDataType newRowType = renameFields(inputRowType, root.getNames(), 0).right; - return RelRoot.of(convertedNode, newRowType, SqlKind.SELECT); + return RelRoot.of(convertedNode, newRowType, kind); } /** diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalciteOperation.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalciteOperation.java deleted file mode 100644 index 6399f54ed..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToCalciteOperation.java +++ /dev/null @@ -1,77 +0,0 @@ -package io.substrait.isthmus; - -import io.substrait.extension.SimpleExtension; -import io.substrait.isthmus.operation.CalciteOperation; -import io.substrait.isthmus.operation.CreateTableAs; -import io.substrait.isthmus.operation.CreateView; -import io.substrait.isthmus.operation.RelationalOperation; -import io.substrait.isthmus.operation.SqlKindFromRel; -import io.substrait.relation.AbstractRelVisitor; -import io.substrait.relation.NamedDdl; -import io.substrait.relation.NamedWrite; -import io.substrait.relation.Rel; -import io.substrait.util.EmptyVisitationContext; -import java.util.List; -import org.apache.calcite.plan.RelTraitDef; -import org.apache.calcite.prepare.Prepare; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.RelRoot; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.parser.SqlParser; -import org.apache.calcite.tools.Frameworks; -import org.apache.calcite.tools.RelBuilder; - -public class SubstraitToCalciteOperation - extends AbstractRelVisitor< - CalciteOperation, SubstraitRelNodeConverter.Context, RuntimeException> { - - private final SubstraitRelNodeConverter substraitRelNodeConverter; - - public SubstraitToCalciteOperation( - SimpleExtension.ExtensionCollection extensionCollection, - RelDataTypeFactory relDataTypeFactory, - Prepare.CatalogReader catalogReader, - SqlParser.Config parserConfig) { - - RelBuilder relBuilder = - RelBuilder.create( - Frameworks.newConfigBuilder() - .parserConfig(parserConfig) - .defaultSchema(catalogReader.getRootSchema().plus()) - .traitDefs((List) null) - .programs() - .build()); - this.substraitRelNodeConverter = - new SubstraitRelNodeConverter(extensionCollection, relDataTypeFactory, relBuilder); - } - - @Override - public CalciteOperation visit(NamedDdl namedDdl, SubstraitRelNodeConverter.Context ctx) { - if (namedDdl.getViewDefinition().isEmpty()) { - throw new IllegalArgumentException("no view definition found"); - } - Rel viewDefinition = namedDdl.getViewDefinition().get(); - RelNode relNode = viewDefinition.accept(substraitRelNodeConverter, ctx); - RelRoot relRoot = RelRoot.of(relNode, SqlKind.CREATE_VIEW); - - return new CreateView(namedDdl.getNames(), relRoot); - } - - @Override - public CalciteOperation visit(NamedWrite namedWrite, SubstraitRelNodeConverter.Context ctx) { - Rel input = namedWrite.getInput(); - RelNode relNode = input.accept(substraitRelNodeConverter, ctx); - RelRoot relRoot = RelRoot.of(relNode, SqlKind.CREATE_TABLE); - return new CreateTableAs(namedWrite.getNames(), relRoot); - } - - @Override - public CalciteOperation visitFallback(Rel rel, SubstraitRelNodeConverter.Context context) { - SqlKindFromRel sqlKindFromRel = new SqlKindFromRel(); - SqlKind kind = rel.accept(sqlKindFromRel, EmptyVisitationContext.INSTANCE); - RelNode relNode = rel.accept(substraitRelNodeConverter, context); - RelRoot relRoot = RelRoot.of(relNode, kind); - return new RelationalOperation(relRoot); - } -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java new file mode 100644 index 000000000..ff8bab06a --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java @@ -0,0 +1,41 @@ +package io.substrait.isthmus.calcite.rel; + +import java.util.List; +import org.apache.calcite.rel.AbstractRelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.type.RelDataType; + +public class CreateTable extends AbstractRelNode { + + private List names; + private RelRoot input; + + public RelRoot getInput() { + return input; + } + + public void setInput(RelRoot input) { + this.input = input; + } + + public CreateTable(List names, RelRoot input) { + super(input.rel.getCluster(), input.rel.getTraitSet()); + + this.names = names; + this.input = input; + } + + @Override + protected RelDataType deriveRowType() { + // return new DdlRelDataType(); + return input.validatedRowType; + } + + public List getNames() { + return names; + } + + public void setNames(List names) { + this.names = names; + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java new file mode 100644 index 000000000..819d65ba6 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java @@ -0,0 +1,39 @@ +package io.substrait.isthmus.calcite.rel; + +import java.util.List; +import org.apache.calcite.rel.AbstractRelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.type.RelDataType; + +public class CreateView extends AbstractRelNode { + private List names; + private RelRoot input; + + public CreateView(List names, RelRoot input) { + super(input.rel.getCluster(), input.rel.getTraitSet()); + this.names = names; + this.input = input; + } + + @Override + protected RelDataType deriveRowType() { + // return new DdlRelDataType(); + return input.validatedRowType; + } + + public List getNames() { + return names; + } + + public void setNames(List names) { + this.names = names; + } + + public RelRoot getInput() { + return input; + } + + public void setInput(RelRoot input) { + this.input = input; + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/DdlSqlToRelConverter.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/DdlSqlToRelConverter.java new file mode 100644 index 000000000..4a4702c58 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/DdlSqlToRelConverter.java @@ -0,0 +1,64 @@ +package io.substrait.isthmus.calcite.rel; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Function; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.ddl.SqlCreateTable; +import org.apache.calcite.sql.ddl.SqlCreateView; +import org.apache.calcite.sql.util.SqlBasicVisitor; +import org.apache.calcite.sql2rel.SqlToRelConverter; + +public class DdlSqlToRelConverter extends SqlBasicVisitor { + + protected final Map, Function> ddlHandlers = + new ConcurrentHashMap<>(); + private final SqlToRelConverter converter; + + private Function findDdlHandler(final SqlCall call) { + Class currentClass = call.getClass(); + while (SqlCall.class.isAssignableFrom(currentClass)) { + final Function found = ddlHandlers.get(currentClass); + if (found != null) { + return found; + } + currentClass = currentClass.getSuperclass(); + } + return null; + } + + public DdlSqlToRelConverter(SqlToRelConverter converter) { + this.converter = converter; + + ddlHandlers.put(SqlCreateTable.class, sqlCall -> handleCreateTable((SqlCreateTable) sqlCall)); + ddlHandlers.put(SqlCreateView.class, sqlCall -> handleCreateView((SqlCreateView) sqlCall)); + } + + @Override + public RelRoot visit(SqlCall sqlCall) { + Function ddlHandler = findDdlHandler(sqlCall); + if (ddlHandler != null) { + return ddlHandler.apply(sqlCall); + } + return handleNonDdl(sqlCall); + } + + protected RelRoot handleNonDdl(final SqlNode sqlNode) { + return converter.convertQuery(sqlNode, true, true); + } + + protected RelRoot handleCreateTable(final SqlCreateTable sqlCreateTable) { + if (sqlCreateTable.query == null) { + throw new IllegalArgumentException("Only create table as select statements are supported"); + } + final RelRoot input = converter.convertQuery(sqlCreateTable.query, true, true); + return RelRoot.of(new CreateTable(sqlCreateTable.name.names, input), sqlCreateTable.getKind()); + } + + protected RelRoot handleCreateView(final SqlCreateView sqlCreateView) { + final RelRoot input = converter.convertQuery(sqlCreateView.query, true, true); + return RelRoot.of(new CreateTable(sqlCreateView.name.names, input), sqlCreateView.getKind()); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/DdlRelBuilder.java b/isthmus/src/main/java/io/substrait/isthmus/expression/DdlRelBuilder.java deleted file mode 100644 index ddff9e173..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/DdlRelBuilder.java +++ /dev/null @@ -1,123 +0,0 @@ -package io.substrait.isthmus.expression; - -import io.substrait.expression.Expression; -import io.substrait.expression.ExpressionCreator; -import io.substrait.extension.SimpleExtension; -import io.substrait.isthmus.FeatureBoard; -import io.substrait.isthmus.SubstraitRelVisitor; -import io.substrait.isthmus.TypeConverter; -import io.substrait.plan.Plan; -import io.substrait.relation.AbstractDdlRel; -import io.substrait.relation.AbstractWriteRel; -import io.substrait.relation.NamedDdl; -import io.substrait.relation.NamedWrite; -import io.substrait.type.NamedStruct; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.function.BiFunction; -import java.util.function.Function; -import org.apache.calcite.rel.RelRoot; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.sql.ddl.SqlCreateTable; -import org.apache.calcite.sql.ddl.SqlCreateView; -import org.apache.calcite.sql.util.SqlBasicVisitor; -import org.apache.calcite.sql2rel.SqlToRelConverter; - -public class DdlRelBuilder extends SqlBasicVisitor { - protected final Map, Function> createHandlers = - new ConcurrentHashMap<>(); - - private final SqlToRelConverter converter; - private final BiFunction bestExpRelRootGetter; - private final SimpleExtension.ExtensionCollection extensionCollection; - private final FeatureBoard featureBoard; - - public DdlRelBuilder( - final SqlToRelConverter converter, - final BiFunction bestExpRelRootGetter, - final SimpleExtension.ExtensionCollection extensionCollection, - final FeatureBoard featureBoard) { - super(); - this.converter = converter; - this.bestExpRelRootGetter = bestExpRelRootGetter; - this.extensionCollection = extensionCollection; - this.featureBoard = featureBoard; - - createHandlers.put( - SqlCreateTable.class, sqlCall -> handleCreateTable((SqlCreateTable) sqlCall)); - createHandlers.put(SqlCreateView.class, sqlCall -> handleCreateView((SqlCreateView) sqlCall)); - } - - private Function findCreateHandler(final SqlCall call) { - Class currentClass = call.getClass(); - while (SqlCall.class.isAssignableFrom(currentClass)) { - final Function found = createHandlers.get(currentClass); - if (found != null) { - return found; - } - currentClass = currentClass.getSuperclass(); - } - return null; - } - - @Override - public Plan.Root visit(final SqlCall sqlCall) { - Function createHandler = findCreateHandler(sqlCall); - if (createHandler == null) { - return null; - } - - return createHandler.apply(sqlCall); - } - - private NamedStruct getSchema(final RelRoot queryRelRoot) { - final RelDataType rowType = queryRelRoot.rel.getRowType(); - - final TypeConverter typeConverter = TypeConverter.DEFAULT; - return typeConverter.toNamedStruct(rowType); - } - - protected Plan.Root handleCreateTable(final SqlCreateTable sqlCreateTable) { - if (sqlCreateTable.query == null) { - throw new IllegalArgumentException("Only create table as select statements are supported"); - } - - final RelRoot queryRelRoot = bestExpRelRootGetter.apply(converter, sqlCreateTable.query); - - NamedStruct schema = getSchema(queryRelRoot); - - Plan.Root rel = SubstraitRelVisitor.convert(queryRelRoot, extensionCollection, featureBoard); - NamedWrite namedWrite = - NamedWrite.builder() - .input(rel.getInput()) - .tableSchema(schema) - .operation(AbstractWriteRel.WriteOp.CTAS) - .createMode(AbstractWriteRel.CreateMode.REPLACE_IF_EXISTS) - .outputMode(AbstractWriteRel.OutputMode.NO_OUTPUT) - .names(sqlCreateTable.name.names) - .build(); - - return Plan.Root.builder().input(namedWrite).build(); - } - - protected Plan.Root handleCreateView(final SqlCreateView sqlCreateView) { - - final RelRoot queryRelRoot = bestExpRelRootGetter.apply(converter, sqlCreateView.query); - Plan.Root rel = SubstraitRelVisitor.convert(queryRelRoot, extensionCollection, featureBoard); - final Expression.StructLiteral defaults = ExpressionCreator.struct(false); - - final NamedDdl namedDdl = - NamedDdl.builder() - .viewDefinition(rel.getInput()) - .tableSchema(getSchema(queryRelRoot)) - .tableDefaults(defaults) - .operation(AbstractDdlRel.DdlOp.CREATE) - .object(AbstractDdlRel.DdlObject.VIEW) - .names(sqlCreateView.name.names) - .build(); - - return Plan.Root.builder().input(namedDdl).build(); - } -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperation.java b/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperation.java deleted file mode 100644 index 96f43c841..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperation.java +++ /dev/null @@ -1,7 +0,0 @@ -package io.substrait.isthmus.operation; - -public class CalciteOperation { - public R accept(CalciteOperationVisitor visitor) { - return visitor.visit(this); - } -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBasicVisitor.java b/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBasicVisitor.java deleted file mode 100644 index dee4c9fdb..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBasicVisitor.java +++ /dev/null @@ -1,38 +0,0 @@ -package io.substrait.isthmus.operation; - -public class CalciteOperationBasicVisitor implements CalciteOperationVisitor { - @Override - public R visit(CalciteOperation operation) { - return null; - } - - @Override - public R visit(RelationalOperation relationalOperation) { - return visit((CalciteOperation) relationalOperation); - } - - @Override - public R visit(DdlOperation ddlOperation) { - return visit((CalciteOperation) ddlOperation); - } - - @Override - public R visit(CreateWithInput createWithInput) { - return visit((Create) createWithInput); - } - - @Override - public R visit(Create create) { - return visit((DdlOperation) create); - } - - @Override - public R visit(CreateTableAs createTableAs) { - return visit((CreateWithInput) createTableAs); - } - - @Override - public R visit(CreateView createView) { - return visit((CreateWithInput) createView); - } -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBuilder.java b/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBuilder.java deleted file mode 100644 index 74c0f22af..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationBuilder.java +++ /dev/null @@ -1,69 +0,0 @@ -package io.substrait.isthmus.operation; - -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.function.BiFunction; -import java.util.function.Function; -import org.apache.calcite.rel.RelRoot; -import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.sql.ddl.SqlCreateTable; -import org.apache.calcite.sql.ddl.SqlCreateView; -import org.apache.calcite.sql.util.SqlBasicVisitor; -import org.apache.calcite.sql2rel.SqlToRelConverter; - -public class CalciteOperationBuilder extends SqlBasicVisitor { - protected final Map, Function> ddlHandlers = - new ConcurrentHashMap<>(); - private final SqlToRelConverter converter; - private final BiFunction bestExprRelRootBuilder; - - public CalciteOperationBuilder( - final SqlToRelConverter converter, - BiFunction bestExprRelRootBuilder) { - this.converter = converter; - this.bestExprRelRootBuilder = bestExprRelRootBuilder; - - ddlHandlers.put(SqlCreateTable.class, sqlCall -> handleCreateTable((SqlCreateTable) sqlCall)); - ddlHandlers.put(SqlCreateView.class, sqlCall -> handleCreateView((SqlCreateView) sqlCall)); - } - - private Function findDdlHandler(final SqlCall call) { - Class currentClass = call.getClass(); - while (SqlCall.class.isAssignableFrom(currentClass)) { - final Function found = ddlHandlers.get(currentClass); - if (found != null) { - return found; - } - currentClass = currentClass.getSuperclass(); - } - return null; - } - - @Override - public CalciteOperation visit(final SqlCall sqlCall) { - Function ddlHandler = findDdlHandler(sqlCall); - if (ddlHandler != null) { - return ddlHandler.apply(sqlCall); - } - return handleRelationalOperation(sqlCall); - } - - protected CalciteOperation handleRelationalOperation(final SqlNode sqlNode) { - return new RelationalOperation(bestExprRelRootBuilder.apply(converter, sqlNode)); - } - - protected CalciteOperation handleCreateTable(final SqlCreateTable sqlCreateTable) { - if (sqlCreateTable.query == null) { - throw new IllegalArgumentException("Only create table as select statements are supported"); - } - - final RelRoot queryRelRoot = bestExprRelRootBuilder.apply(converter, sqlCreateTable.query); - return new CreateTableAs(sqlCreateTable.name.names, queryRelRoot); - } - - protected CalciteOperation handleCreateView(final SqlCreateView sqlCreateView) { - final RelRoot queryRelRoot = bestExprRelRootBuilder.apply(converter, sqlCreateView.query); - return new CreateView(sqlCreateView.name.names, queryRelRoot); - } -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationVisitor.java b/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationVisitor.java deleted file mode 100644 index 6b69523b2..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/operation/CalciteOperationVisitor.java +++ /dev/null @@ -1,17 +0,0 @@ -package io.substrait.isthmus.operation; - -public interface CalciteOperationVisitor { - R visit(CalciteOperation operation); - - R visit(RelationalOperation relationalOperation); - - R visit(DdlOperation ddlOperation); - - R visit(CreateWithInput createWithInput); - - R visit(Create create); - - R visit(CreateTableAs createTableAs); - - R visit(CreateView createView); -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/Create.java b/isthmus/src/main/java/io/substrait/isthmus/operation/Create.java deleted file mode 100644 index dbae7a86b..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/operation/Create.java +++ /dev/null @@ -1,33 +0,0 @@ -package io.substrait.isthmus.operation; - -import java.util.List; - -public abstract class Create extends DdlOperation { - private final List names; - private final RelationType relationType; - - public Create(RelationType relationType, List names) { - this.relationType = relationType; - this.names = names; - } - - @Override - public List getNames() { - return names; - } - - @Override - public RelationType getRelationType() { - return relationType; - } - - @Override - public Operation getOperation() { - return Operation.CREATE; - } - - @Override - public R accept(CalciteOperationVisitor visitor) { - return visitor.visit(this); - } -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/CreateTableAs.java b/isthmus/src/main/java/io/substrait/isthmus/operation/CreateTableAs.java deleted file mode 100644 index 15c05fcf0..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/operation/CreateTableAs.java +++ /dev/null @@ -1,15 +0,0 @@ -package io.substrait.isthmus.operation; - -import java.util.List; -import org.apache.calcite.rel.RelRoot; - -public class CreateTableAs extends CreateWithInput { - public CreateTableAs(List names, RelRoot input) { - super(RelationType.TABLE, names, input); - } - - @Override - public R accept(CalciteOperationVisitor visitor) { - return visitor.visit(this); - } -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/CreateView.java b/isthmus/src/main/java/io/substrait/isthmus/operation/CreateView.java deleted file mode 100644 index d30998fa4..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/operation/CreateView.java +++ /dev/null @@ -1,15 +0,0 @@ -package io.substrait.isthmus.operation; - -import java.util.List; -import org.apache.calcite.rel.RelRoot; - -public class CreateView extends CreateWithInput { - public CreateView(List names, RelRoot input) { - super(RelationType.VIEW, names, input); - } - - @Override - public R accept(CalciteOperationVisitor visitor) { - return visitor.visit(this); - } -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/CreateWithInput.java b/isthmus/src/main/java/io/substrait/isthmus/operation/CreateWithInput.java deleted file mode 100644 index 685f0c5f1..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/operation/CreateWithInput.java +++ /dev/null @@ -1,27 +0,0 @@ -package io.substrait.isthmus.operation; - -import java.util.List; -import org.apache.calcite.rel.RelRoot; -import org.apache.calcite.rel.type.RelDataType; - -public abstract class CreateWithInput extends Create { - private final RelRoot input; - - public CreateWithInput(RelationType relationType, List names, RelRoot input) { - super(relationType, names); - this.input = input; - } - - public RelRoot getInput() { - return input; - } - - public RelDataType getRowType() { - return input.rel.getRowType(); - } - - @Override - public R accept(CalciteOperationVisitor visitor) { - return visitor.visit(this); - } -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/DdlOperation.java b/isthmus/src/main/java/io/substrait/isthmus/operation/DdlOperation.java deleted file mode 100644 index b1419d814..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/operation/DdlOperation.java +++ /dev/null @@ -1,28 +0,0 @@ -package io.substrait.isthmus.operation; - -import java.util.List; - -public abstract class DdlOperation extends CalciteOperation { - public enum RelationType { - TABLE, - VIEW, - MATERIALIZED_VIEW - // TODO: , SCHEMA - } - - public enum Operation { - CREATE, - DROP - } - - public abstract List getNames(); - - public abstract RelationType getRelationType(); - - public abstract Operation getOperation(); - - @Override - public R accept(CalciteOperationVisitor visitor) { - return visitor.visit(this); - } -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/operation/RelationalOperation.java b/isthmus/src/main/java/io/substrait/isthmus/operation/RelationalOperation.java deleted file mode 100644 index e89c08b89..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/operation/RelationalOperation.java +++ /dev/null @@ -1,20 +0,0 @@ -package io.substrait.isthmus.operation; - -import org.apache.calcite.rel.RelRoot; - -public class RelationalOperation extends CalciteOperation { - private final RelRoot relRoot; - - public RelationalOperation(RelRoot relRoot) { - this.relRoot = relRoot; - } - - public RelRoot getRelRoot() { - return relRoot; - } - - @Override - public R accept(CalciteOperationVisitor visitor) { - return visitor.visit(this); - } -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java index a4d1de274..0f7891b5b 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java +++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java @@ -5,6 +5,7 @@ import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.parser.SqlParseException; import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.parser.ddl.SqlDdlParserImpl; import org.apache.calcite.sql.validate.SqlConformanceEnum; /** @@ -18,7 +19,8 @@ public class SubstraitSqlStatementParser { // TODO: switch to Casing.UNCHANGED .withUnquotedCasing(Casing.TO_UPPER) // use LENIENT conformance to allow for parsing a wide variety of dialects - .withConformance(SqlConformanceEnum.LENIENT); + .withConformance(SqlConformanceEnum.LENIENT) + .withParserFactory(SqlDdlParserImpl.FACTORY); /** * Parse one or more SQL statements to a list of {@link SqlNode}s. @@ -31,18 +33,4 @@ public static List parseStatements(String sqlStatements) throws SqlPars SqlParser parser = SqlParser.create(sqlStatements, PARSER_CONFIG); return parser.parseStmtList(); } - - /** - * Parse one or more SQL statements to a list of {@link SqlNode}s. - * - * @param sqlStatements a string containing one or more SQL statements - * @param parserConfig sql parser configuration - * @return a list of {@link SqlNode}s corresponding to the given statements - * @throws SqlParseException if there is an error while parsing the SQL statements - */ - public static List parseStatements(String sqlStatements, SqlParser.Config parserConfig) - throws SqlParseException { - SqlParser parser = SqlParser.create(sqlStatements, parserConfig); - return parser.parseStmtList(); - } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java index a72f4ab9c..ed34c57e2 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java +++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java @@ -1,16 +1,13 @@ package io.substrait.isthmus.sql; -import io.substrait.isthmus.FeatureBoard; -import io.substrait.isthmus.ImmutableFeatureBoard; import io.substrait.isthmus.SubstraitTypeSystem; -import io.substrait.isthmus.operation.CalciteOperation; -import io.substrait.isthmus.operation.CalciteOperationBuilder; -import io.substrait.isthmus.operation.RelationalOperation; +import io.substrait.isthmus.calcite.rel.DdlSqlToRelConverter; import java.util.List; import java.util.stream.Collectors; import org.apache.calcite.jdbc.JavaTypeFactoryImpl; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.hep.HepPlanner; import org.apache.calcite.plan.hep.HepProgram; import org.apache.calcite.prepare.Prepare; @@ -20,9 +17,6 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.parser.SqlParseException; -import org.apache.calcite.sql.parser.SqlParser; -import org.apache.calcite.sql.parser.ddl.SqlDdlParserImpl; -import org.apache.calcite.sql.validate.SqlConformanceEnum; import org.apache.calcite.sql.validate.SqlValidator; import org.apache.calcite.sql2rel.SqlToRelConverter; import org.apache.calcite.sql2rel.StandardConvertletTable; @@ -33,62 +27,6 @@ */ public class SubstraitSqlToCalcite { - protected static SqlParser.Config createDefaultParserConfig() { - return SqlParser.Config.DEFAULT - .withUnquotedCasing(createDefaultFeatureBoard().unquotedCasing()) - .withParserFactory(SqlDdlParserImpl.FACTORY) - .withConformance(SqlConformanceEnum.LENIENT); - } - - protected static FeatureBoard createDefaultFeatureBoard() { - return ImmutableFeatureBoard.builder().build(); - } - - protected static SqlToRelConverter createSqlToRelConverter( - SqlValidator validator, Prepare.CatalogReader catalogReader, RelOptCluster relOptCluster) { - return new SqlToRelConverter( - null, - validator, - catalogReader, - relOptCluster, - StandardConvertletTable.INSTANCE, - SqlToRelConverter.CONFIG); - } - - protected static RelRoot parseRelationalExpression(SqlToRelConverter converter, SqlNode sqlNode) { - RelRoot converted = converter.convertQuery(sqlNode, true, true); - return converted.withRel(removeRedundantProjects(converted.rel)); - } - - protected static RelOptCluster createDefaultRelOptCluster() { - RexBuilder rexBuilder = - new RexBuilder(new JavaTypeFactoryImpl(SubstraitTypeSystem.TYPE_SYSTEM)); - HepProgram program = HepProgram.builder().build(); - RelOptPlanner emptyPlanner = new HepPlanner(program); - return RelOptCluster.create(emptyPlanner, rexBuilder); - } - - protected static RelNode removeRedundantProjects(RelNode root) { - // The Calcite RelBuilder, when constructing Project that does not modify its inputs in any way, - // simply elides it. The PROJECT_REMOVE rule can be used to remove such projects from Rel trees. - // This facilitates roundtrip testing. - HepProgram program = HepProgram.builder().addRuleInstance(CoreRules.PROJECT_REMOVE).build(); - HepPlanner planner = new HepPlanner(program); - planner.setRoot(root); - return planner.findBestExp(); - } - - protected static CalciteOperation convert( - SqlNode sqlNode, - Prepare.CatalogReader catalogReader, - SqlValidator validator, - RelOptCluster cluster) { - SqlToRelConverter converter = createSqlToRelConverter(validator, catalogReader, cluster); - CalciteOperationBuilder operationBuilder = - new CalciteOperationBuilder(converter, SubstraitSqlToCalcite::parseRelationalExpression); - return sqlNode.accept(operationBuilder); - } - /** * Converts a SQL statement to a Calcite {@link RelRoot}. * @@ -98,91 +36,14 @@ protected static CalciteOperation convert( * @return a {@link RelRoot} corresponding to the given SQL statement * @throws SqlParseException if there is an error while parsing the SQL statement */ - public static RelRoot convertRelationalQuery( - String sqlStatement, Prepare.CatalogReader catalogReader) throws SqlParseException { - return convertRelationalQuery( - sqlStatement, - catalogReader, - new SubstraitSqlValidator(catalogReader), - createDefaultParserConfig(), - createDefaultRelOptCluster()); - } - - /** - * Converts a SQL statement to a Calcite {@link RelRoot}. - * - * @param sqlStatement a SQL statement string - * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in - * the SQL statement - * @param validator the {@link SqlValidator} used to validate the SQL statement. Allows for - * additional control of SQL functions and operators via {@link - * SqlValidator#getOperatorTable()} - * @param parserConfig the {@link SqlParser.Config} used for parsing the Sql statement - * @param cluster the {@link RelOptCluster} used when creating {@link RelNode}s during statement - * processing. Calcite expects that the {@link RelOptCluster} used during statement processing - * is the same as that used during query optimization. - * @return a {@link RelRoot} corresponding to the given SQL statement - * @throws SqlParseException if there is an error while parsing the SQL statement - */ - public static RelRoot convertRelationalQuery( - String sqlStatement, - Prepare.CatalogReader catalogReader, - SqlValidator validator, - SqlParser.Config parserConfig, - RelOptCluster cluster) + public static RelRoot convertQuery(String sqlStatement, Prepare.CatalogReader catalogReader) throws SqlParseException { - CalciteOperation calciteOperation = - convertQuery(sqlStatement, catalogReader, validator, parserConfig, cluster); - if (calciteOperation instanceof RelationalOperation) { - return ((RelationalOperation) calciteOperation).getRelRoot(); - } - throw new IllegalArgumentException( - String.format("Non-relational algebra statement: %s", sqlStatement)); - } - - /** - * Converts a SQL statement to a CalciteOperation {@link CalciteOperation}. - * - * @param sqlStatement a SQL statement - * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in - * the SQL statement - * @return {@link CalciteOperation} corresponding to the given SQL statement - * @throws SqlParseException if there is an error while parsing the SQL statement string - */ - public static CalciteOperation convertQuery( - String sqlStatement, Prepare.CatalogReader catalogReader) throws SqlParseException { - return convertQuery( - sqlStatement, - catalogReader, - new SubstraitSqlValidator(catalogReader), - createDefaultParserConfig(), - createDefaultRelOptCluster()); - } - - /** - * Converts a parsed sql statement (SqlNode) to a CalciteOperation {@link CalciteOperation}. - * - * @param sqlNode a parsed SQL statement - * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in - * the SQL statement - * @param validator the {@link SqlValidator} used to validate the SQL statement. Allows for - * additional control of SQL functions and operators via {@link - * SqlValidator#getOperatorTable()} - * @param cluster the {@link RelOptCluster} used when creating {@link RelNode}s during statement - * processing. Calcite expects that the {@link RelOptCluster} used during statement processing - * is the same as that used during query optimization. - * @return {@link CalciteOperation} corresponding to the given SQL statement - */ - public static CalciteOperation convertQuery( - SqlNode sqlNode, - Prepare.CatalogReader catalogReader, - SqlValidator validator, - RelOptCluster cluster) { - return convert(sqlNode, catalogReader, validator, cluster); + SqlValidator validator = new SubstraitSqlValidator(catalogReader); + return convertQuery(sqlStatement, catalogReader, validator, createDefaultRelOptCluster()); } /** - * Converts a SQL statement to a CalciteOperation {@link CalciteOperation}. + * Converts a SQL statement to a Calcite {@link RelRoot}. * * @param sqlStatement a SQL statement * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in @@ -190,27 +51,26 @@ public static CalciteOperation convertQuery( * @param validator the {@link SqlValidator} used to validate the SQL statement. Allows for * additional control of SQL functions and operators via {@link * SqlValidator#getOperatorTable()} - * @param parserConfig the {@link SqlParser.Config} used for parsing the Sql statement * @param cluster the {@link RelOptCluster} used when creating {@link RelNode}s during statement * processing. Calcite expects that the {@link RelOptCluster} used during statement processing * is the same as that used during query optimization. - * @return {@link CalciteOperation} corresponding to the given SQL statement + * @return {@link RelRoot} corresponding to the given SQL statement * @throws SqlParseException if there is an error while parsing the SQL statement string */ - public static CalciteOperation convertQuery( + public static RelRoot convertQuery( String sqlStatement, Prepare.CatalogReader catalogReader, SqlValidator validator, - SqlParser.Config parserConfig, RelOptCluster cluster) throws SqlParseException { - List sqlNodes = - SubstraitSqlStatementParser.parseStatements(sqlStatement, parserConfig); + List sqlNodes = SubstraitSqlStatementParser.parseStatements(sqlStatement); if (sqlNodes.size() != 1) { throw new IllegalArgumentException( - String.format("Expected one statement, found %d", sqlNodes.size())); + String.format("Expected one statement, found: %d", sqlNodes.size())); } - return convertQuery(sqlNodes.get(0), catalogReader, validator, cluster); + List relRoots = convert(sqlNodes, catalogReader, validator, cluster); + // as there was only 1 statement, there should only be 1 root + return relRoots.get(0); } /** @@ -223,15 +83,10 @@ public static CalciteOperation convertQuery( * @return a list of {@link RelRoot}s corresponding to the given SQL statements * @throws SqlParseException if there is an error while parsing the SQL statements */ - public static List convertRelationalQueries( + public static List convertQueries( String sqlStatements, Prepare.CatalogReader catalogReader) throws SqlParseException { SqlValidator validator = new SubstraitSqlValidator(catalogReader); - return convertRelationalQueries( - sqlStatements, - catalogReader, - validator, - createDefaultParserConfig(), - createDefaultRelOptCluster()); + return convertQueries(sqlStatements, catalogReader, validator, createDefaultRelOptCluster()); } /** @@ -244,48 +99,73 @@ public static List convertRelationalQueries( * @param validator the {@link SqlValidator} used to validate SQL statements. Allows for * additional control of SQL functions and operators via {@link * SqlValidator#getOperatorTable()} - * @param parserConfig the {@link SqlParser.Config} used for parsing the Sql statement * * @param cluster the {@link RelOptCluster} used when creating {@link RelNode}s during statement * processing. Calcite expects that the {@link RelOptCluster} used during statement processing * is the same as that used during query optimization. * @return a list of {@link RelRoot}s corresponding to the given SQL statements * @throws SqlParseException if there is an error while parsing the SQL statements */ - public static List convertRelationalQueries( + public static List convertQueries( String sqlStatements, Prepare.CatalogReader catalogReader, SqlValidator validator, - SqlParser.Config parserConfig, RelOptCluster cluster) throws SqlParseException { - List sqlNodes = - SubstraitSqlStatementParser.parseStatements(sqlStatements, parserConfig); + List sqlNodes = SubstraitSqlStatementParser.parseStatements(sqlStatements); + return convert(sqlNodes, catalogReader, validator, cluster); + } + + static List convert( + List sqlNodes, + Prepare.CatalogReader catalogReader, + SqlValidator validator, + RelOptCluster cluster) { + RelOptTable.ViewExpander viewExpander = null; + SqlToRelConverter converter = + new SqlToRelConverter( + viewExpander, + validator, + catalogReader, + cluster, + StandardConvertletTable.INSTANCE, + SqlToRelConverter.CONFIG); + // apply validation + boolean needsValidation = true; + // query is the root of the tree + boolean top = true; + DdlSqlToRelConverter ddlSqlToRelConverter = new DdlSqlToRelConverter(converter); return sqlNodes.stream() - .map(sqlNode -> convert(sqlNode, catalogReader, validator, cluster)) - .filter(calciteOperation -> calciteOperation instanceof RelationalOperation) - .map(calciteOperation -> ((RelationalOperation) calciteOperation).getRelRoot()) - .collect(Collectors.toUnmodifiableList()); + .map( + sqlNode -> { + RelRoot relRoot = sqlNode.accept(ddlSqlToRelConverter); + if (relRoot == null) { + relRoot = + removeRedundantProjects(converter.convertQuery(sqlNode, needsValidation, top)); + } + return relRoot; + }) + .collect(Collectors.toList()); } - /** - * Converts one or more SQL statements to a List of {@link CalciteOperation}, with one {@link - * CalciteOperation} per statement. - * - * @param sqlStatements a string containing one or more SQL statements - * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in - * the SQL statements - * @return a list of {@link CalciteOperation}s corresponding to the given SQL statements - * @throws SqlParseException if there is an error while parsing the SQL statements - */ - public static List convertQueries( - String sqlStatements, Prepare.CatalogReader catalogReader) throws SqlParseException { - List sqlNodes = - SubstraitSqlStatementParser.parseStatements(sqlStatements, createDefaultParserConfig()); - SqlValidator validator = new SubstraitSqlValidator(catalogReader); - RelOptCluster cluster = createDefaultRelOptCluster(); + static RelOptCluster createDefaultRelOptCluster() { + RexBuilder rexBuilder = + new RexBuilder(new JavaTypeFactoryImpl(SubstraitTypeSystem.TYPE_SYSTEM)); + HepProgram program = HepProgram.builder().build(); + RelOptPlanner emptyPlanner = new HepPlanner(program); + return RelOptCluster.create(emptyPlanner, rexBuilder); + } - return sqlNodes.stream() - .map(sqlNode -> convert(sqlNode, catalogReader, validator, cluster)) - .collect(Collectors.toUnmodifiableList()); + static RelRoot removeRedundantProjects(RelRoot root) { + return root.withRel(removeRedundantProjects(root.rel)); + } + + static RelNode removeRedundantProjects(RelNode root) { + // The Calcite RelBuilder, when constructing Project that does not modify its inputs in any way, + // simply elides it. The PROJECT_REMOVE rule can be used to remove such projects from Rel trees. + // This facilitates roundtrip testing. + HepProgram program = HepProgram.builder().addRuleInstance(CoreRules.PROJECT_REMOVE).build(); + HepPlanner planner = new HepPlanner(program); + planner.setRoot(root); + return planner.findBestExp(); } } diff --git a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java index e817e9c9c..3c054cee5 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java @@ -1,13 +1,6 @@ package io.substrait.isthmus; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import io.substrait.isthmus.operation.CalciteOperation; import io.substrait.isthmus.sql.SubstraitCreateStatementParser; -import io.substrait.plan.Plan; -import io.substrait.plan.PlanProtoConverter; -import io.substrait.plan.ProtoPlanConverter; -import io.substrait.relation.Rel; import org.apache.calcite.prepare.Prepare; import org.apache.calcite.sql.parser.SqlParseException; import org.junit.jupiter.api.Test; @@ -22,65 +15,15 @@ public DdlRoundtripTest() throws SqlParseException { super(); } - void testSqlToSubstrait(String sqlStatement) throws SqlParseException { - SqlToSubstrait sqlToSubstrait = new SqlToSubstrait(); - io.substrait.proto.Plan protoPlan = sqlToSubstrait.execute(sqlStatement, catalogReader); - Plan plan = new ProtoPlanConverter().from(protoPlan); - io.substrait.proto.Plan protoPlan1 = new PlanProtoConverter().toProto(plan); - assertEquals(protoPlan, protoPlan1); - } - - protected void assertCalciteOperationSubstraitRelRoundTrip( - String query, Prepare.CatalogReader catalogReader) throws Exception { - // sql <--> substrait round trip test. - // Assert (sql -> calcite -> substrait) and (sql -> substrait -> calcite -> substrait) are same. - // Return list of sql -> Substrait rel -> Calcite rel. - SqlToSubstrait s = new SqlToSubstrait(); - - // 1. SQL -> Substrait Plan - Plan plan1 = s.convert(query, catalogReader); - - // 2. Substrait Plan -> Substrait Rel - Plan.Root pojo1 = plan1.getRoots().get(0); - - // 3. Substrait Rel -> CalciteOperation - - SubstraitToCalciteOperation substraitToCalciteOperation = - new SubstraitToCalciteOperation(extensions, typeFactory, catalogReader, s.parserConfig); - Rel rel = plan1.getRoots().get(0).getInput(); - CalciteOperation calciteOperation = - rel.accept(substraitToCalciteOperation, SubstraitRelNodeConverter.Context.newContext()); - - // 4. CalciteOperation -> Substrait Rel - CalciteOperationToSubstrait calciteOperationToSubstrait = - new CalciteOperationToSubstrait(extensions, s.featureBoard); - Plan.Root pojo2 = calciteOperation.accept(calciteOperationToSubstrait); - - assertEquals(pojo1, pojo2); - } - - void testPlanRoundTrip(String sqlStatement) throws Exception { - SqlToSubstrait sql2subst = new SqlToSubstrait(); - final Plan plan = sql2subst.convert(sqlStatement, catalogReader); - - assertPlanRoundtrip(plan); - assertCalciteOperationSubstraitRelRoundTrip(sqlStatement, catalogReader); - - // assertFullRoundTrip(sqlStatement, catalogReader); - } - @Test void testCreateTable() throws Exception { String sql = "create table dst1 as select * from src1"; - testSqlToSubstrait(sql); - // TBD: full roundtrip is not possible because there is no relational algebra for DDL - testPlanRoundTrip(sql); + assertFullRoundTripWorkaroundOptimizer(sql, catalogReader); } @Test void testCreateView() throws Exception { String sql = "create view dst1 as select * from src1"; - testSqlToSubstrait(sql); - testPlanRoundTrip(sql); + assertFullRoundTripWorkaroundOptimizer(sql, catalogReader); } } diff --git a/isthmus/src/test/java/io/substrait/isthmus/DmlRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/DmlRoundtripTest.java index e1408f658..5db7db71c 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/DmlRoundtripTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/DmlRoundtripTest.java @@ -16,18 +16,20 @@ public DmlRoundtripTest() throws SqlParseException {} @Test void testDelete() throws SqlParseException { - assertFullRoundTrip("delete from src1 where intcol=10", catalogReader); + assertFullRoundTripWorkaroundOptimizer("delete from src1 where intcol=10", catalogReader); } @Test void testUpdate() throws SqlParseException { - assertFullRoundTrip("update src1 set intcol=10 where charcol='a'", catalogReader); + assertFullRoundTripWorkaroundOptimizer( + "update src1 set intcol=10 where charcol='a'", catalogReader); } @Test void testInsert() throws SqlParseException { - assertFullRoundTrip("insert into src1 (intcol, charcol) values (1,'a'); ", catalogReader); - assertFullRoundTrip( + assertFullRoundTripWorkaroundOptimizer( + "insert into src1 (intcol, charcol) values (1,'a'); ", catalogReader); + assertFullRoundTripWorkaroundOptimizer( "insert into src1 (intcol, charcol) select intcol,charcol from src2;", catalogReader); } } diff --git a/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java index 975acb86d..d4671736d 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java @@ -26,7 +26,8 @@ void preserveNamesFromSql() throws Exception { List expectedNames = List.of("a", "B"); org.apache.calcite.rel.RelRoot calciteRelRoot1 = - SubstraitSqlToCalcite.convertRelationalQuery(query, catalogReader); + SubstraitSqlToCalcite.convertQuery(query, catalogReader); + assertEquals(expectedNames, calciteRelRoot1.validatedRowType.getFieldNames()); io.substrait.plan.Plan.Root substraitRelRoot = diff --git a/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java b/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java index 305c051ac..fd9572b2e 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java @@ -23,7 +23,7 @@ void conversionHandlesBuiltInSum0CallAddedByRule() throws SqlParseException, IOE // verify that the query works generally assertFullRoundTrip(query); - RelRoot relRoot = SubstraitSqlToCalcite.convertRelationalQuery(query, TPCH_CATALOG); + RelRoot relRoot = SubstraitSqlToCalcite.convertQuery(query, TPCH_CATALOG); RelNode originalPlan = relRoot.rel; // Create a program to apply the AGGREGATE_EXPAND_DISTINCT_AGGREGATES_TO_JOIN rule. diff --git a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java index 265546e27..fff50bf47 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java +++ b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java @@ -178,7 +178,8 @@ protected void assertFullRoundTrip(String sqlQuery, Prepare.CatalogReader catalo ExtensionCollector extensionCollector = new ExtensionCollector(); // SQL -> Calcite 1 - RelRoot calcite1 = SubstraitSqlToCalcite.convertRelationalQuery(sqlQuery, catalogReader); + // RelRoot calcite1 = SubstraitSqlToCalcite.convertRelationalQuery(sqlQuery, catalogReader); + RelRoot calcite1 = SubstraitSqlToCalcite.convertQuery(sqlQuery, catalogReader); // Calcite 1 -> Substrait POJO 1 Plan.Root root1 = SubstraitRelVisitor.convert(calcite1, extensions); @@ -208,6 +209,71 @@ protected void assertFullRoundTrip(String sqlQuery, Prepare.CatalogReader catalo assertEquals(root1, root3); } + /** + * Verifies that the given query can be converted from its Calcite form to the Substrait Proto + * representation and back. Since calcite optimizer (sql->calcite) and calcite RelBuilder might + * use different optimization (plus it is hard to disable optimization for the latter), calcite + * needs to be obtained after processing with RelBuilder. Therefore, a preparation step is + * required + * + *

Preparation: + * SQL -> Calcite 0 -> Substrait POJO 0 -> Substrait Proto 0 -> Substrait POJO 1 -> Calcite 1 + * this code also checks that: Main cycle: + * + *

    + *
  • Substrait POJO 0 == Substrait POJO 1 + *
+ * + * Calcite 1 -> Substrait POJO 2 -> Substrait Proto 2 -> Substrait POJO 3 -> Calcite 2 -> + * Substrait POJO 4 + * + *
    + *
  • Substrait POJO 2 == Substrait POJO 4 + *
+ */ + protected void assertFullRoundTripWorkaroundOptimizer( + String sqlQuery, Prepare.CatalogReader catalogReader) throws SqlParseException { + ExtensionCollector extensionCollector = new ExtensionCollector(); + + // preparation: SQL -> Calcite 0 + RelRoot calcite0 = SubstraitSqlToCalcite.convertQuery(sqlQuery, catalogReader); + + // Calcite 0 -> Substrait POJO 0 + Plan.Root root0 = SubstraitRelVisitor.convert(calcite0, extensions); + + // Substrait POJO 0 -> Substrait Proto 0 + io.substrait.proto.RelRoot proto0 = new RelProtoConverter(extensionCollector).toProto(root0); + + // Substrait Proto -> Substrait POJO 1 + Plan.Root root1 = new ProtoRelConverter(extensionCollector, extensions).from(proto0); + + // Verify that POJOs are the same + assertEquals(root0, root1); + + final SubstraitToCalcite substraitToCalcite = + new SubstraitToCalcite(extensions, typeFactory, catalogReader); + // Substrait POJO 1 -> Calcite 1 + RelRoot calcite1 = substraitToCalcite.convert(root1); + // end of preparation + + // Calcite 1 -> Substrait POJO 2 + Plan.Root root2 = SubstraitRelVisitor.convert(calcite1, extensions); + + // Substrait POJO 2 -> Substrait Proto 1 + io.substrait.proto.RelRoot proto1 = new RelProtoConverter(extensionCollector).toProto(root2); + + // Substrait Proto1 -> Substrait POJO 3 + Plan.Root root3 = new ProtoRelConverter(extensionCollector, extensions).from(proto1); + + // Substrait POJO 3 -> Calcite 2 + RelRoot calcite2 = substraitToCalcite.convert(root3); + // Calcite 2 -> Substrait POJO 4 + Plan.Root root4 = SubstraitRelVisitor.convert(calcite2, extensions); + + // Verify that POJOs are the same + assertEquals(root2, root4); + } + /** * Verifies that the given POJO can be converted: * From 0dc6eb396dd5956dce034146a9257c836964001e Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Wed, 17 Sep 2025 15:18:21 +0200 Subject: [PATCH 09/17] chore(isthmus): encapsulate ddml validation --- .../java/io/substrait/util/NoException.java | 5 ++ .../isthmus/SubstraitRelNodeConverter.java | 29 ++++--- ...ubstraitRelNodeConverterDdmlValidator.java | 78 +++++++++++++++++++ .../substrait/isthmus/ValidationResult.java | 30 +++++++ 4 files changed, 131 insertions(+), 11 deletions(-) create mode 100644 core/src/main/java/io/substrait/util/NoException.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverterDdmlValidator.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/ValidationResult.java diff --git a/core/src/main/java/io/substrait/util/NoException.java b/core/src/main/java/io/substrait/util/NoException.java new file mode 100644 index 000000000..683e219e0 --- /dev/null +++ b/core/src/main/java/io/substrait/util/NoException.java @@ -0,0 +1,5 @@ +package io.substrait.util; + +public final class NoException extends RuntimeException { + private NoException() {} +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java index 91e39a73d..b435e0c0a 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java @@ -94,6 +94,7 @@ public class SubstraitRelNodeConverter protected final RelBuilder relBuilder; protected final RexBuilder rexBuilder; private final TypeConverter typeConverter; + private final SubstraitRelNodeConverterDdmlValidator substraitRelNodeConverterDdmlValidator; public SubstraitRelNodeConverter( SimpleExtension.ExtensionCollection extensions, @@ -142,6 +143,8 @@ public SubstraitRelNodeConverter( this.aggregateFunctionConverter = aggregateFunctionConverter; this.expressionRexConverter = expressionRexConverter; this.expressionRexConverter.setRelNodeConverter(this); + this.substraitRelNodeConverterDdmlValidator = + new SubstraitRelNodeConverterDdmlValidator(relBuilder, this); } public static RelNode convert( @@ -554,8 +557,11 @@ public RelNode visit(NamedUpdate update, Context context) { @Override public RelNode visit(NamedDdl namedDdl, Context context) { - if (namedDdl.getViewDefinition().isEmpty()) { - throw new IllegalArgumentException("no view definition found"); + final ValidationResult validationResult = + namedDdl.accept(substraitRelNodeConverterDdmlValidator, context); + if (!validationResult.isValid()) { + throw new IllegalArgumentException( + String.join(System.lineSeparator(), validationResult.getMessages())); } Rel viewDefinition = namedDdl.getViewDefinition().get(); RelNode relNode = viewDefinition.accept(this, context); @@ -565,7 +571,6 @@ public RelNode visit(NamedDdl namedDdl, Context context) { @Override public RelNode visit(VirtualTableScan virtualTableScan, Context context) { - final RelDataType typeInfoOnly = typeConverter.toCalcite(typeFactory, virtualTableScan.getInitialSchema().struct()); @@ -609,6 +614,12 @@ private RelNode handleCreateTableAs(NamedWrite namedWrite, Context context) { @Override public RelNode visit(NamedWrite write, Context context) { + final ValidationResult validationResult = + write.accept(substraitRelNodeConverterDdmlValidator, context); + if (!validationResult.isValid()) { + throw new IllegalArgumentException( + String.join(System.lineSeparator(), validationResult.getMessages())); + } RelNode input = write.getInput().accept(this, context); assert relBuilder.getRelOptSchema() != null; final RelOptTable targetTable = @@ -625,16 +636,12 @@ public RelNode visit(NamedWrite write, Context context) { case CTAS: return handleCreateTableAs(write, context); default: - throw new UnsupportedOperationException( - "Write operation '" - + write.getOperation() - + "' is not supported by the NamedWrite visitor. " - + "Check if a more specific relation type (e.g., NamedUpdate) should be used."); + // checked by validation + throw new IllegalArgumentException("Couldn't determine operation"); } - if (targetTable == null) { - throw new IllegalStateException("Table not found in Calcite catalog: " + write.getNames()); - } + // checked by validation + assert targetTable != null; return LogicalTableModify.create( targetTable, diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverterDdmlValidator.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverterDdmlValidator.java new file mode 100644 index 000000000..dd6a48292 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverterDdmlValidator.java @@ -0,0 +1,78 @@ +package io.substrait.isthmus; + +import io.substrait.relation.AbstractDdlRel; +import io.substrait.relation.AbstractRelVisitor; +import io.substrait.relation.AbstractWriteRel; +import io.substrait.relation.NamedDdl; +import io.substrait.relation.NamedWrite; +import io.substrait.relation.Rel; +import io.substrait.util.NoException; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; +import org.apache.calcite.tools.RelBuilder; + +public class SubstraitRelNodeConverterDdmlValidator + extends AbstractRelVisitor { + private static final Set NAMED_WRITE_OPERATIONS_SUPPORTED = + Set.of( + AbstractWriteRel.WriteOp.INSERT, + AbstractWriteRel.WriteOp.DELETE, + AbstractWriteRel.WriteOp.UPDATE, + AbstractWriteRel.WriteOp.CTAS); + + private final RelBuilder relBuilder; + private final SubstraitRelNodeConverter substraitRelNodeConverter; + + public SubstraitRelNodeConverterDdmlValidator( + final RelBuilder relBuilder, SubstraitRelNodeConverter substraitRelNodeConverter) { + this.relBuilder = relBuilder; + this.substraitRelNodeConverter = substraitRelNodeConverter; + } + + @Override + public ValidationResult visitFallback(Rel rel, SubstraitRelNodeConverter.Context context) { + return ValidationResult.ok(); + } + + @Override + public ValidationResult visit(NamedDdl namedDdl, SubstraitRelNodeConverter.Context ctx) { + final List validationMessages = new LinkedList<>(); + if (namedDdl.getViewDefinition().isEmpty()) { + validationMessages.add("No view definition found"); + } + if (!AbstractDdlRel.DdlObject.VIEW.equals(namedDdl.getObject())) { + validationMessages.add("Unsupported object: " + namedDdl.getObject().name()); + } + if (!AbstractDdlRel.DdlOp.CREATE.equals(namedDdl.getOperation())) { + validationMessages.add("Unsupported operation: " + namedDdl.getOperation().name()); + } + return validationMessages.isEmpty() + ? ValidationResult.SUCCESS + : ValidationResult.error(validationMessages); + } + + @Override + public ValidationResult visit(NamedWrite namedWrite, SubstraitRelNodeConverter.Context ctx) { + final List validationMessages = new LinkedList<>(); + if (!NAMED_WRITE_OPERATIONS_SUPPORTED.contains(namedWrite.getOperation())) { + validationMessages.add( + "Write operation '" + + namedWrite.getOperation() + + "' is not supported by the NamedWrite visitor. " + + "Check if a more specific relation type (e.g., NamedUpdate) should be used."); + } + if (!AbstractWriteRel.WriteOp.CTAS.equals(namedWrite.getOperation()) + && (relBuilder.getRelOptSchema() == null + || relBuilder.getRelOptSchema().getTableForMember(namedWrite.getNames()) == null)) { + validationMessages.add("Target table not found in Calcite catalog: " + namedWrite.getNames()); + } + + if (namedWrite.getInput().accept(substraitRelNodeConverter, ctx) == null) { + validationMessages.add("Only write operations with input supported"); + } + return validationMessages.isEmpty() + ? ValidationResult.SUCCESS + : ValidationResult.error(validationMessages); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/ValidationResult.java b/isthmus/src/main/java/io/substrait/isthmus/ValidationResult.java new file mode 100644 index 000000000..2e458bc26 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/ValidationResult.java @@ -0,0 +1,30 @@ +package io.substrait.isthmus; + +import java.util.List; + +public class ValidationResult { + public static final ValidationResult SUCCESS = ok(); + private final boolean isValid; + private final List messages; + + public ValidationResult(boolean isValid, List messages) { + this.isValid = isValid; + this.messages = messages; + } + + public static ValidationResult ok() { + return new ValidationResult(true, List.of("OK")); + } + + public static ValidationResult error(List messages) { + return new ValidationResult(false, messages); + } + + public boolean isValid() { + return isValid; + } + + public List getMessages() { + return messages; + } +} From 0046f9f797f4127fa4724a9d67b273e958466917 Mon Sep 17 00:00:00 2001 From: Victor Barua Date: Wed, 17 Sep 2025 16:14:56 -0700 Subject: [PATCH 10/17] refactor: simplify DDL validation --- .../java/io/substrait/util/NoException.java | 5 -- .../isthmus/SubstraitRelNodeConverter.java | 47 +++++++---- ...ubstraitRelNodeConverterDdmlValidator.java | 78 ------------------- .../substrait/isthmus/ValidationResult.java | 30 ------- 4 files changed, 31 insertions(+), 129 deletions(-) delete mode 100644 core/src/main/java/io/substrait/util/NoException.java delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverterDdmlValidator.java delete mode 100644 isthmus/src/main/java/io/substrait/isthmus/ValidationResult.java diff --git a/core/src/main/java/io/substrait/util/NoException.java b/core/src/main/java/io/substrait/util/NoException.java deleted file mode 100644 index 683e219e0..000000000 --- a/core/src/main/java/io/substrait/util/NoException.java +++ /dev/null @@ -1,5 +0,0 @@ -package io.substrait.util; - -public final class NoException extends RuntimeException { - private NoException() {} -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java index b435e0c0a..9ef4540e9 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java @@ -16,8 +16,10 @@ import io.substrait.isthmus.expression.ExpressionRexConverter; import io.substrait.isthmus.expression.ScalarFunctionConverter; import io.substrait.isthmus.expression.WindowFunctionConverter; +import io.substrait.relation.AbstractDdlRel; import io.substrait.relation.AbstractRelVisitor; import io.substrait.relation.AbstractUpdate; +import io.substrait.relation.AbstractWriteRel; import io.substrait.relation.Aggregate; import io.substrait.relation.Cross; import io.substrait.relation.EmptyScan; @@ -94,7 +96,6 @@ public class SubstraitRelNodeConverter protected final RelBuilder relBuilder; protected final RexBuilder rexBuilder; private final TypeConverter typeConverter; - private final SubstraitRelNodeConverterDdmlValidator substraitRelNodeConverterDdmlValidator; public SubstraitRelNodeConverter( SimpleExtension.ExtensionCollection extensions, @@ -143,8 +144,6 @@ public SubstraitRelNodeConverter( this.aggregateFunctionConverter = aggregateFunctionConverter; this.expressionRexConverter = expressionRexConverter; this.expressionRexConverter.setRelNodeConverter(this); - this.substraitRelNodeConverterDdmlValidator = - new SubstraitRelNodeConverterDdmlValidator(relBuilder, this); } public static RelNode convert( @@ -557,12 +556,21 @@ public RelNode visit(NamedUpdate update, Context context) { @Override public RelNode visit(NamedDdl namedDdl, Context context) { - final ValidationResult validationResult = - namedDdl.accept(substraitRelNodeConverterDdmlValidator, context); - if (!validationResult.isValid()) { - throw new IllegalArgumentException( - String.join(System.lineSeparator(), validationResult.getMessages())); + if (namedDdl.getOperation() != AbstractDdlRel.DdlOp.CREATE + || namedDdl.getObject() != AbstractDdlRel.DdlObject.VIEW) { + throw new UnsupportedOperationException( + String.format( + "Can only handle NamedDdl with (%s, %s), given (%s, %s)", + AbstractDdlRel.DdlOp.CREATE, + AbstractDdlRel.DdlObject.VIEW, + namedDdl.getOperation(), + namedDdl.getObject())); } + + if (namedDdl.getViewDefinition().isEmpty()) { + throw new IllegalArgumentException("NamedDdl view definition must be set"); + } + Rel viewDefinition = namedDdl.getViewDefinition().get(); RelNode relNode = viewDefinition.accept(this, context); RelRoot relRoot = RelRoot.of(relNode, SqlKind.SELECT); @@ -606,6 +614,17 @@ public RelNode visit(VirtualTableScan virtualTableScan, Context context) { } private RelNode handleCreateTableAs(NamedWrite namedWrite, Context context) { + if (namedWrite.getCreateMode() != AbstractWriteRel.CreateMode.REPLACE_IF_EXISTS + || namedWrite.getOutputMode() != AbstractWriteRel.OutputMode.NO_OUTPUT) { + throw new UnsupportedOperationException( + String.format( + "Can only handle CTAS NamedWrite with (%s, %s), given (%s, %s)", + AbstractWriteRel.CreateMode.REPLACE_IF_EXISTS, + AbstractWriteRel.OutputMode.NO_OUTPUT, + namedWrite.getCreateMode(), + namedWrite.getOutputMode())); + } + Rel input = namedWrite.getInput(); RelNode relNode = input.accept(this, context); RelRoot relRoot = RelRoot.of(relNode, SqlKind.SELECT); @@ -614,12 +633,6 @@ private RelNode handleCreateTableAs(NamedWrite namedWrite, Context context) { @Override public RelNode visit(NamedWrite write, Context context) { - final ValidationResult validationResult = - write.accept(substraitRelNodeConverterDdmlValidator, context); - if (!validationResult.isValid()) { - throw new IllegalArgumentException( - String.join(System.lineSeparator(), validationResult.getMessages())); - } RelNode input = write.getInput().accept(this, context); assert relBuilder.getRelOptSchema() != null; final RelOptTable targetTable = @@ -636,8 +649,10 @@ public RelNode visit(NamedWrite write, Context context) { case CTAS: return handleCreateTableAs(write, context); default: - // checked by validation - throw new IllegalArgumentException("Couldn't determine operation"); + throw new UnsupportedOperationException( + String.format( + "NamedWrite with WriteOp %s cannot be converted to a Calcite RelNode. Consider using a more specific Rel (e.g NamedUpdate)", + write.getOperation())); } // checked by validation diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverterDdmlValidator.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverterDdmlValidator.java deleted file mode 100644 index dd6a48292..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverterDdmlValidator.java +++ /dev/null @@ -1,78 +0,0 @@ -package io.substrait.isthmus; - -import io.substrait.relation.AbstractDdlRel; -import io.substrait.relation.AbstractRelVisitor; -import io.substrait.relation.AbstractWriteRel; -import io.substrait.relation.NamedDdl; -import io.substrait.relation.NamedWrite; -import io.substrait.relation.Rel; -import io.substrait.util.NoException; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; -import org.apache.calcite.tools.RelBuilder; - -public class SubstraitRelNodeConverterDdmlValidator - extends AbstractRelVisitor { - private static final Set NAMED_WRITE_OPERATIONS_SUPPORTED = - Set.of( - AbstractWriteRel.WriteOp.INSERT, - AbstractWriteRel.WriteOp.DELETE, - AbstractWriteRel.WriteOp.UPDATE, - AbstractWriteRel.WriteOp.CTAS); - - private final RelBuilder relBuilder; - private final SubstraitRelNodeConverter substraitRelNodeConverter; - - public SubstraitRelNodeConverterDdmlValidator( - final RelBuilder relBuilder, SubstraitRelNodeConverter substraitRelNodeConverter) { - this.relBuilder = relBuilder; - this.substraitRelNodeConverter = substraitRelNodeConverter; - } - - @Override - public ValidationResult visitFallback(Rel rel, SubstraitRelNodeConverter.Context context) { - return ValidationResult.ok(); - } - - @Override - public ValidationResult visit(NamedDdl namedDdl, SubstraitRelNodeConverter.Context ctx) { - final List validationMessages = new LinkedList<>(); - if (namedDdl.getViewDefinition().isEmpty()) { - validationMessages.add("No view definition found"); - } - if (!AbstractDdlRel.DdlObject.VIEW.equals(namedDdl.getObject())) { - validationMessages.add("Unsupported object: " + namedDdl.getObject().name()); - } - if (!AbstractDdlRel.DdlOp.CREATE.equals(namedDdl.getOperation())) { - validationMessages.add("Unsupported operation: " + namedDdl.getOperation().name()); - } - return validationMessages.isEmpty() - ? ValidationResult.SUCCESS - : ValidationResult.error(validationMessages); - } - - @Override - public ValidationResult visit(NamedWrite namedWrite, SubstraitRelNodeConverter.Context ctx) { - final List validationMessages = new LinkedList<>(); - if (!NAMED_WRITE_OPERATIONS_SUPPORTED.contains(namedWrite.getOperation())) { - validationMessages.add( - "Write operation '" - + namedWrite.getOperation() - + "' is not supported by the NamedWrite visitor. " - + "Check if a more specific relation type (e.g., NamedUpdate) should be used."); - } - if (!AbstractWriteRel.WriteOp.CTAS.equals(namedWrite.getOperation()) - && (relBuilder.getRelOptSchema() == null - || relBuilder.getRelOptSchema().getTableForMember(namedWrite.getNames()) == null)) { - validationMessages.add("Target table not found in Calcite catalog: " + namedWrite.getNames()); - } - - if (namedWrite.getInput().accept(substraitRelNodeConverter, ctx) == null) { - validationMessages.add("Only write operations with input supported"); - } - return validationMessages.isEmpty() - ? ValidationResult.SUCCESS - : ValidationResult.error(validationMessages); - } -} diff --git a/isthmus/src/main/java/io/substrait/isthmus/ValidationResult.java b/isthmus/src/main/java/io/substrait/isthmus/ValidationResult.java deleted file mode 100644 index 2e458bc26..000000000 --- a/isthmus/src/main/java/io/substrait/isthmus/ValidationResult.java +++ /dev/null @@ -1,30 +0,0 @@ -package io.substrait.isthmus; - -import java.util.List; - -public class ValidationResult { - public static final ValidationResult SUCCESS = ok(); - private final boolean isValid; - private final List messages; - - public ValidationResult(boolean isValid, List messages) { - this.isValid = isValid; - this.messages = messages; - } - - public static ValidationResult ok() { - return new ValidationResult(true, List.of("OK")); - } - - public static ValidationResult error(List messages) { - return new ValidationResult(false, messages); - } - - public boolean isValid() { - return isValid; - } - - public List getMessages() { - return messages; - } -} From f27501f1b5b4aaa1b8796914722d172216a7e936 Mon Sep 17 00:00:00 2001 From: Victor Barua Date: Wed, 17 Sep 2025 16:16:05 -0700 Subject: [PATCH 11/17] refactor: remove unecessary comments --- .../main/java/io/substrait/isthmus/calcite/rel/CreateTable.java | 1 - .../main/java/io/substrait/isthmus/calcite/rel/CreateView.java | 1 - 2 files changed, 2 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java index ff8bab06a..b8a549fd4 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java @@ -27,7 +27,6 @@ public CreateTable(List names, RelRoot input) { @Override protected RelDataType deriveRowType() { - // return new DdlRelDataType(); return input.validatedRowType; } diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java index 819d65ba6..90360a4b3 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java @@ -17,7 +17,6 @@ public CreateView(List names, RelRoot input) { @Override protected RelDataType deriveRowType() { - // return new DdlRelDataType(); return input.validatedRowType; } From bdc93a27ba14ffe16c36b33869bd0e325d33f9d4 Mon Sep 17 00:00:00 2001 From: Victor Barua Date: Wed, 17 Sep 2025 16:24:42 -0700 Subject: [PATCH 12/17] refactor: remove setters --- .../isthmus/calcite/rel/CreateTable.java | 16 ++++------------ .../isthmus/calcite/rel/CreateView.java | 12 ++---------- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java index b8a549fd4..ca2881ddf 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java @@ -7,16 +7,8 @@ public class CreateTable extends AbstractRelNode { - private List names; - private RelRoot input; - - public RelRoot getInput() { - return input; - } - - public void setInput(RelRoot input) { - this.input = input; - } + private final List names; + private final RelRoot input; public CreateTable(List names, RelRoot input) { super(input.rel.getCluster(), input.rel.getTraitSet()); @@ -34,7 +26,7 @@ public List getNames() { return names; } - public void setNames(List names) { - this.names = names; + public RelRoot getInput() { + return input; } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java index 90360a4b3..2b7b86696 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java @@ -6,8 +6,8 @@ import org.apache.calcite.rel.type.RelDataType; public class CreateView extends AbstractRelNode { - private List names; - private RelRoot input; + private final List names; + private final RelRoot input; public CreateView(List names, RelRoot input) { super(input.rel.getCluster(), input.rel.getTraitSet()); @@ -24,15 +24,7 @@ public List getNames() { return names; } - public void setNames(List names) { - this.names = names; - } - public RelRoot getInput() { return input; } - - public void setInput(RelRoot input) { - this.input = input; - } } From 08d99ca056e7c09d434135a0c4dd8ff591163a32 Mon Sep 17 00:00:00 2001 From: Victor Barua Date: Wed, 17 Sep 2025 17:18:11 -0700 Subject: [PATCH 13/17] refactor: tableName and viewName instead of names --- .../io/substrait/isthmus/SubstraitRelVisitor.java | 4 ++-- .../substrait/isthmus/calcite/rel/CreateTable.java | 12 +++++++----- .../io/substrait/isthmus/calcite/rel/CreateView.java | 12 +++++++----- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java index 88de080e2..4a86554f3 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java @@ -465,7 +465,7 @@ public Rel handleCreateTable(CreateTable createTable) { .operation(AbstractWriteRel.WriteOp.CTAS) .createMode(AbstractWriteRel.CreateMode.REPLACE_IF_EXISTS) .outputMode(AbstractWriteRel.OutputMode.NO_OUTPUT) - .names(createTable.getNames()) + .names(createTable.getTableName()) .build(); } @@ -481,7 +481,7 @@ public Rel handleCreateView(CreateView createView) { .tableDefaults(defaults) .operation(AbstractDdlRel.DdlOp.CREATE) .object(AbstractDdlRel.DdlObject.VIEW) - .names(createView.getNames()) + .names(createView.getViewName()) .build(); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java index ca2881ddf..32e5b49c1 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java @@ -2,18 +2,20 @@ import java.util.List; import org.apache.calcite.rel.AbstractRelNode; +import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.type.RelDataType; public class CreateTable extends AbstractRelNode { - private final List names; + private final List tableName; private final RelRoot input; - public CreateTable(List names, RelRoot input) { + public CreateTable(List tableName, RelRoot input) { super(input.rel.getCluster(), input.rel.getTraitSet()); - this.names = names; + this.tableName = tableName; this.input = input; } @@ -22,8 +24,8 @@ protected RelDataType deriveRowType() { return input.validatedRowType; } - public List getNames() { - return names; + public List getTableName() { + return tableName; } public RelRoot getInput() { diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java index 2b7b86696..d5d53bce0 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java @@ -2,16 +2,18 @@ import java.util.List; import org.apache.calcite.rel.AbstractRelNode; +import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.type.RelDataType; public class CreateView extends AbstractRelNode { - private final List names; + private final List viewName; private final RelRoot input; - public CreateView(List names, RelRoot input) { + public CreateView(List viewName, RelRoot input) { super(input.rel.getCluster(), input.rel.getTraitSet()); - this.names = names; + this.viewName = viewName; this.input = input; } @@ -20,8 +22,8 @@ protected RelDataType deriveRowType() { return input.validatedRowType; } - public List getNames() { - return names; + public List getViewName() { + return viewName; } public RelRoot getInput() { From b317325fd07715f705d52c039650225a7280402b Mon Sep 17 00:00:00 2001 From: Victor Barua Date: Wed, 17 Sep 2025 17:18:52 -0700 Subject: [PATCH 14/17] feat: enable .explain() on CreateTable and CreateView --- .../io/substrait/isthmus/calcite/rel/CreateTable.java | 10 ++++++++++ .../io/substrait/isthmus/calcite/rel/CreateView.java | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java index 32e5b49c1..a3e2560eb 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java @@ -24,6 +24,16 @@ protected RelDataType deriveRowType() { return input.validatedRowType; } + @Override + public RelWriter explainTerms(RelWriter pw) { + return super.explainTerms(pw).input("input", getInput().rel).item("tableName", getTableName()); + } + + @Override + public List getInputs() { + return List.of(input.rel); + } + public List getTableName() { return tableName; } diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java index d5d53bce0..57e8823e0 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java @@ -22,6 +22,16 @@ protected RelDataType deriveRowType() { return input.validatedRowType; } + @Override + public RelWriter explainTerms(RelWriter pw) { + return super.explainTerms(pw).input("input", getInput().rel).item("viewName", getViewName()); + } + + @Override + public List getInputs() { + return List.of(input.rel); + } + public List getViewName() { return viewName; } From 2d27fec73951b2c607f766f47487176fa527052b Mon Sep 17 00:00:00 2001 From: Victor Barua Date: Wed, 17 Sep 2025 17:54:36 -0700 Subject: [PATCH 15/17] refactor: remove commend --- isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java | 1 - 1 file changed, 1 deletion(-) diff --git a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java index fff50bf47..4c5448de8 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java +++ b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java @@ -178,7 +178,6 @@ protected void assertFullRoundTrip(String sqlQuery, Prepare.CatalogReader catalo ExtensionCollector extensionCollector = new ExtensionCollector(); // SQL -> Calcite 1 - // RelRoot calcite1 = SubstraitSqlToCalcite.convertRelationalQuery(sqlQuery, catalogReader); RelRoot calcite1 = SubstraitSqlToCalcite.convertQuery(sqlQuery, catalogReader); // Calcite 1 -> Substrait POJO 1 From d1af907c1fcc614056288b0af26f924f0a0a1979 Mon Sep 17 00:00:00 2001 From: Victor Barua Date: Wed, 17 Sep 2025 18:13:33 -0700 Subject: [PATCH 16/17] docs: clarify purpose of special test method --- .../substrait/isthmus/DdlRoundtripTest.java | 4 ++-- .../substrait/isthmus/DmlRoundtripTest.java | 9 +++++---- .../io/substrait/isthmus/PlanTestBase.java | 20 +++++++++++-------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java index 3c054cee5..1f14d9459 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java @@ -18,12 +18,12 @@ public DdlRoundtripTest() throws SqlParseException { @Test void testCreateTable() throws Exception { String sql = "create table dst1 as select * from src1"; - assertFullRoundTripWorkaroundOptimizer(sql, catalogReader); + assertFullRoundTripWithIdentityProjectionWorkaround(sql, catalogReader); } @Test void testCreateView() throws Exception { String sql = "create view dst1 as select * from src1"; - assertFullRoundTripWorkaroundOptimizer(sql, catalogReader); + assertFullRoundTripWithIdentityProjectionWorkaround(sql, catalogReader); } } diff --git a/isthmus/src/test/java/io/substrait/isthmus/DmlRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/DmlRoundtripTest.java index 5db7db71c..71db7d28f 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/DmlRoundtripTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/DmlRoundtripTest.java @@ -16,20 +16,21 @@ public DmlRoundtripTest() throws SqlParseException {} @Test void testDelete() throws SqlParseException { - assertFullRoundTripWorkaroundOptimizer("delete from src1 where intcol=10", catalogReader); + assertFullRoundTripWithIdentityProjectionWorkaround( + "delete from src1 where intcol=10", catalogReader); } @Test void testUpdate() throws SqlParseException { - assertFullRoundTripWorkaroundOptimizer( + assertFullRoundTripWithIdentityProjectionWorkaround( "update src1 set intcol=10 where charcol='a'", catalogReader); } @Test void testInsert() throws SqlParseException { - assertFullRoundTripWorkaroundOptimizer( + assertFullRoundTripWithIdentityProjectionWorkaround( "insert into src1 (intcol, charcol) values (1,'a'); ", catalogReader); - assertFullRoundTripWorkaroundOptimizer( + assertFullRoundTripWithIdentityProjectionWorkaround( "insert into src1 (intcol, charcol) select intcol,charcol from src2;", catalogReader); } } diff --git a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java index 4c5448de8..aba04f047 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java +++ b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java @@ -209,11 +209,12 @@ protected void assertFullRoundTrip(String sqlQuery, Prepare.CatalogReader catalo } /** - * Verifies that the given query can be converted from its Calcite form to the Substrait Proto - * representation and back. Since calcite optimizer (sql->calcite) and calcite RelBuilder might - * use different optimization (plus it is hard to disable optimization for the latter), calcite - * needs to be obtained after processing with RelBuilder. Therefore, a preparation step is - * required + * Verifies that the given query can be converted from its Calcite representation to Substrait + * proto and back. Due to the various ways in which Calcite plans are produced, some plans contain + * identity projections and others do not. Fully removing this behaviour is quite tricky. As a + * workaround, this method prepares the plan such that identity projections are removed. + * + *

In the long-term, we should work to remove this test method. * *

Preparation: * SQL -> Calcite 0 -> Substrait POJO 0 -> Substrait Proto 0 -> Substrait POJO 1 -> Calcite 1 @@ -230,11 +231,12 @@ protected void assertFullRoundTrip(String sqlQuery, Prepare.CatalogReader catalo *

  • Substrait POJO 2 == Substrait POJO 4 * */ - protected void assertFullRoundTripWorkaroundOptimizer( + protected void assertFullRoundTripWithIdentityProjectionWorkaround( String sqlQuery, Prepare.CatalogReader catalogReader) throws SqlParseException { ExtensionCollector extensionCollector = new ExtensionCollector(); - // preparation: SQL -> Calcite 0 + // Preparation + // SQL -> Calcite 0 RelRoot calcite0 = SubstraitSqlToCalcite.convertQuery(sqlQuery, catalogReader); // Calcite 0 -> Substrait POJO 0 @@ -251,9 +253,11 @@ protected void assertFullRoundTripWorkaroundOptimizer( final SubstraitToCalcite substraitToCalcite = new SubstraitToCalcite(extensions, typeFactory, catalogReader); + // Substrait POJO 1 -> Calcite 1 RelRoot calcite1 = substraitToCalcite.convert(root1); - // end of preparation + + // End Preparation // Calcite 1 -> Substrait POJO 2 Plan.Root root2 = SubstraitRelVisitor.convert(calcite1, extensions); From be2e6b2541cdaeadd5642f19997d0ea575689106 Mon Sep 17 00:00:00 2001 From: Anton Zorin Date: Thu, 18 Sep 2025 10:55:39 +0200 Subject: [PATCH 17/17] refactor: migrate to RelNode as input in CreateTable and CreateView --- .../isthmus/SubstraitRelNodeConverter.java | 8 ++------ .../io/substrait/isthmus/SubstraitRelVisitor.java | 12 ++++++------ .../isthmus/calcite/rel/CreateTable.java | 15 +++++++-------- .../substrait/isthmus/calcite/rel/CreateView.java | 15 +++++++-------- .../isthmus/calcite/rel/DdlSqlToRelConverter.java | 5 +++-- 5 files changed, 25 insertions(+), 30 deletions(-) diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java index 9ef4540e9..aa8281fe3 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelNodeConverter.java @@ -58,7 +58,6 @@ import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.JoinRelType; @@ -74,7 +73,6 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexSlot; import org.apache.calcite.sql.SqlAggFunction; -import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.tools.Frameworks; @@ -573,8 +571,7 @@ public RelNode visit(NamedDdl namedDdl, Context context) { Rel viewDefinition = namedDdl.getViewDefinition().get(); RelNode relNode = viewDefinition.accept(this, context); - RelRoot relRoot = RelRoot.of(relNode, SqlKind.SELECT); - return new CreateView(namedDdl.getNames(), relRoot); + return new CreateView(namedDdl.getNames(), relNode); } @Override @@ -627,8 +624,7 @@ private RelNode handleCreateTableAs(NamedWrite namedWrite, Context context) { Rel input = namedWrite.getInput(); RelNode relNode = input.accept(this, context); - RelRoot relRoot = RelRoot.of(relNode, SqlKind.SELECT); - return new CreateTable(namedWrite.getNames(), relRoot); + return new CreateTable(namedWrite.getNames(), relNode); } @Override diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java index 4a86554f3..79449ee9e 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitRelVisitor.java @@ -450,14 +450,14 @@ public Rel visit(TableModify modify) { } } - private NamedStruct getSchema(final RelRoot queryRelRoot) { - final RelDataType rowType = queryRelRoot.rel.getRowType(); + private NamedStruct getSchema(final RelNode queryRelRoot) { + final RelDataType rowType = queryRelRoot.getRowType(); return typeConverter.toNamedStruct(rowType); } public Rel handleCreateTable(CreateTable createTable) { - RelRoot input = createTable.getInput(); - Rel inputRel = apply(input.project()); + RelNode input = createTable.getInput(); + Rel inputRel = apply(input); NamedStruct schema = getSchema(input); return NamedWrite.builder() .input(inputRel) @@ -470,8 +470,8 @@ public Rel handleCreateTable(CreateTable createTable) { } public Rel handleCreateView(CreateView createView) { - RelRoot input = createView.getInput(); - Rel inputRel = apply(input.project()); + RelNode input = createView.getInput(); + Rel inputRel = apply(input); final Expression.StructLiteral defaults = ExpressionCreator.struct(false); diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java index a3e2560eb..66a030b8b 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java @@ -3,17 +3,16 @@ import java.util.List; import org.apache.calcite.rel.AbstractRelNode; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.type.RelDataType; public class CreateTable extends AbstractRelNode { private final List tableName; - private final RelRoot input; + private final RelNode input; - public CreateTable(List tableName, RelRoot input) { - super(input.rel.getCluster(), input.rel.getTraitSet()); + public CreateTable(List tableName, RelNode input) { + super(input.getCluster(), input.getTraitSet()); this.tableName = tableName; this.input = input; @@ -21,24 +20,24 @@ public CreateTable(List tableName, RelRoot input) { @Override protected RelDataType deriveRowType() { - return input.validatedRowType; + return input.getRowType(); } @Override public RelWriter explainTerms(RelWriter pw) { - return super.explainTerms(pw).input("input", getInput().rel).item("tableName", getTableName()); + return super.explainTerms(pw).input("input", getInput()).item("tableName", getTableName()); } @Override public List getInputs() { - return List.of(input.rel); + return List.of(input); } public List getTableName() { return tableName; } - public RelRoot getInput() { + public RelNode getInput() { return input; } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java index 57e8823e0..ef1e228cb 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java @@ -3,40 +3,39 @@ import java.util.List; import org.apache.calcite.rel.AbstractRelNode; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.type.RelDataType; public class CreateView extends AbstractRelNode { private final List viewName; - private final RelRoot input; + private final RelNode input; - public CreateView(List viewName, RelRoot input) { - super(input.rel.getCluster(), input.rel.getTraitSet()); + public CreateView(List viewName, RelNode input) { + super(input.getCluster(), input.getTraitSet()); this.viewName = viewName; this.input = input; } @Override protected RelDataType deriveRowType() { - return input.validatedRowType; + return input.getRowType(); } @Override public RelWriter explainTerms(RelWriter pw) { - return super.explainTerms(pw).input("input", getInput().rel).item("viewName", getViewName()); + return super.explainTerms(pw).input("input", getInput()).item("viewName", getViewName()); } @Override public List getInputs() { - return List.of(input.rel); + return List.of(input); } public List getViewName() { return viewName; } - public RelRoot getInput() { + public RelNode getInput() { return input; } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/DdlSqlToRelConverter.java b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/DdlSqlToRelConverter.java index 4a4702c58..6a237b366 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/DdlSqlToRelConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/calcite/rel/DdlSqlToRelConverter.java @@ -3,6 +3,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; +import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; import org.apache.calcite.sql.SqlCall; import org.apache.calcite.sql.SqlNode; @@ -53,12 +54,12 @@ protected RelRoot handleCreateTable(final SqlCreateTable sqlCreateTable) { if (sqlCreateTable.query == null) { throw new IllegalArgumentException("Only create table as select statements are supported"); } - final RelRoot input = converter.convertQuery(sqlCreateTable.query, true, true); + final RelNode input = converter.convertQuery(sqlCreateTable.query, true, true).rel; return RelRoot.of(new CreateTable(sqlCreateTable.name.names, input), sqlCreateTable.getKind()); } protected RelRoot handleCreateView(final SqlCreateView sqlCreateView) { - final RelRoot input = converter.convertQuery(sqlCreateView.query, true, true); + final RelNode input = converter.convertQuery(sqlCreateView.query, true, true).rel; return RelRoot.of(new CreateTable(sqlCreateView.name.names, input), sqlCreateView.getKind()); } }