Skip to content

Commit 8906eb8

Browse files
authored
feat(isthmus): introduce SubstraitSqlToCalcite and SubstraitStatementParser (#474)
1 parent 02e1143 commit 8906eb8

File tree

6 files changed

+228
-87
lines changed

6 files changed

+228
-87
lines changed
Lines changed: 14 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,12 @@
11
package io.substrait.isthmus;
22

3-
import com.google.common.annotations.VisibleForTesting;
4-
import io.substrait.isthmus.sql.SubstraitSqlValidator;
3+
import io.substrait.isthmus.sql.SubstraitSqlToCalcite;
54
import io.substrait.plan.ImmutablePlan.Builder;
65
import io.substrait.plan.Plan;
76
import io.substrait.plan.Plan.Version;
87
import io.substrait.plan.PlanProtoConverter;
9-
import java.util.List;
10-
import org.apache.calcite.plan.hep.HepPlanner;
11-
import org.apache.calcite.plan.hep.HepProgram;
128
import org.apache.calcite.prepare.Prepare;
13-
import org.apache.calcite.rel.RelRoot;
14-
import org.apache.calcite.rel.rules.CoreRules;
15-
import org.apache.calcite.sql.SqlNode;
16-
import org.apache.calcite.sql.SqlNodeList;
179
import org.apache.calcite.sql.parser.SqlParseException;
18-
import org.apache.calcite.sql.parser.SqlParser;
19-
import org.apache.calcite.sql.validate.SqlValidator;
20-
import org.apache.calcite.sql2rel.SqlToRelConverter;
21-
import org.apache.calcite.sql2rel.StandardConvertletTable;
2210

2311
/** Take a SQL statement and a set of table definitions and return a substrait plan. */
2412
public class SqlToSubstrait extends SqlConverterBase {
@@ -32,83 +20,43 @@ public SqlToSubstrait(FeatureBoard features) {
3220
}
3321

3422
/**
35-
* Converts a SQL statements string into a Substrait proto {@link io.substrait.proto.Plan}.
23+
* Converts one or more SQL statements into a Substrait {@link io.substrait.proto.Plan}.
3624
*
37-
* @param sql the SQL statements string containing one more SQL statements
25+
* @param sqlStatements a string containing one more SQL statements
3826
* @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in
39-
* the SQL statements string
40-
* @return the Substrait proto {@link io.substrait.proto.Plan}
27+
* the SQL statements
28+
* @return a Substrait proto {@link io.substrait.proto.Plan}
4129
* @throws SqlParseException if there is an error while parsing the SQL statements string
4230
* @deprecated use {@link #convert(String, org.apache.calcite.prepare.Prepare.CatalogReader)}
4331
* instead to get a {@link Plan} and convert that to a {@link io.substrait.proto.Plan} using
4432
* {@link PlanProtoConverter#toProto(Plan)}
4533
*/
4634
@Deprecated
47-
public io.substrait.proto.Plan execute(String sql, Prepare.CatalogReader catalogReader)
35+
public io.substrait.proto.Plan execute(String sqlStatements, Prepare.CatalogReader catalogReader)
4836
throws SqlParseException {
4937
PlanProtoConverter planToProto = new PlanProtoConverter();
50-
51-
return planToProto.toProto(convert(sql, catalogReader));
38+
return planToProto.toProto(convert(sqlStatements, catalogReader));
5239
}
5340

5441
/**
55-
* Converts a SQL statements string into a Substrait {@link Plan}.
42+
* Converts one or more SQL statements into a Substrait {@link Plan}.
5643
*
57-
* @param sql the SQL statements string containing one more SQL statements
44+
* @param sqlStatements a string containing one more SQL statements
5845
* @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in
59-
* the SQL statements string
46+
* the SQL statements
6047
* @return the Substrait {@link Plan}
61-
* @throws SqlParseException if there is an error while parsing the SQL statements string
48+
* @throws SqlParseException if there is an error while parsing the SQL statements
6249
*/
63-
public Plan convert(String sql, Prepare.CatalogReader catalogReader) throws SqlParseException {
50+
public Plan convert(String sqlStatements, Prepare.CatalogReader catalogReader)
51+
throws SqlParseException {
6452
Builder builder = io.substrait.plan.Plan.builder();
6553
builder.version(Version.builder().from(Version.DEFAULT_VERSION).producer("isthmus").build());
6654

6755
// TODO: consider case in which one sql passes conversion while others don't
68-
sqlToRelNode(sql, catalogReader).stream()
56+
SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader).stream()
6957
.map(root -> SubstraitRelVisitor.convert(root, EXTENSION_COLLECTION, featureBoard))
7058
.forEach(root -> builder.addRoots(root));
7159

7260
return builder.build();
7361
}
74-
75-
@VisibleForTesting
76-
List<RelRoot> sqlToRelNode(String sql, Prepare.CatalogReader catalogReader)
77-
throws SqlParseException {
78-
SqlValidator validator = new SubstraitSqlValidator(catalogReader);
79-
SqlParser parser = SqlParser.create(sql, parserConfig);
80-
SqlNodeList parsedList = parser.parseStmtList();
81-
SqlToRelConverter converter = createSqlToRelConverter(validator, catalogReader);
82-
List<RelRoot> roots =
83-
parsedList.stream()
84-
.map(parsed -> getBestExpRelRoot(converter, parsed))
85-
.collect(java.util.stream.Collectors.toList());
86-
return roots;
87-
}
88-
89-
protected SqlToRelConverter createSqlToRelConverter(
90-
SqlValidator validator, Prepare.CatalogReader catalogReader) {
91-
SqlToRelConverter converter =
92-
new SqlToRelConverter(
93-
null,
94-
validator,
95-
catalogReader,
96-
relOptCluster,
97-
StandardConvertletTable.INSTANCE,
98-
converterConfig);
99-
return converter;
100-
}
101-
102-
protected RelRoot getBestExpRelRoot(SqlToRelConverter converter, SqlNode parsed) {
103-
RelRoot root = converter.convertQuery(parsed, true, true);
104-
{
105-
// RelBuilder seems to implicitly use the rule below,
106-
// need to add to avoid discrepancies in assertFullRoundTrip
107-
HepProgram program = HepProgram.builder().addRuleInstance(CoreRules.PROJECT_REMOVE).build();
108-
HepPlanner hepPlanner = new HepPlanner(program);
109-
hepPlanner.setRoot(root.rel);
110-
root = root.withRel(hepPlanner.findBestExp());
111-
}
112-
return root;
113-
}
11462
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package io.substrait.isthmus.sql;
2+
3+
import java.util.List;
4+
import org.apache.calcite.avatica.util.Casing;
5+
import org.apache.calcite.sql.SqlNode;
6+
import org.apache.calcite.sql.parser.SqlParseException;
7+
import org.apache.calcite.sql.parser.SqlParser;
8+
import org.apache.calcite.sql.validate.SqlConformanceEnum;
9+
10+
/**
11+
* Utility class for parsing SQL statements to {@link SqlNode}s using a Substrait flavoured SQL
12+
* parser. Intended for testing and experimentation.
13+
*/
14+
public class SubstraitSqlStatementParser {
15+
16+
private static final SqlParser.Config PARSER_CONFIG =
17+
SqlParser.config()
18+
// TODO: switch to Casing.UNCHANGED
19+
.withUnquotedCasing(Casing.TO_UPPER)
20+
// use LENIENT conformance to allow for parsing a wide variety of dialects
21+
.withConformance(SqlConformanceEnum.LENIENT);
22+
23+
/**
24+
* Parse one or more SQL statements to a list of {@link SqlNode}s.
25+
*
26+
* @param sqlStatements a string containing one or more SQL statements
27+
* @return a list of {@link SqlNode}s corresponding to the given statements
28+
* @throws SqlParseException if there is an error while parsing the SQL statements
29+
*/
30+
public static List<SqlNode> parseStatements(String sqlStatements) throws SqlParseException {
31+
SqlParser parser = SqlParser.create(sqlStatements, PARSER_CONFIG);
32+
return parser.parseStmtList();
33+
}
34+
}
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
package io.substrait.isthmus.sql;
2+
3+
import io.substrait.isthmus.SubstraitTypeSystem;
4+
import java.util.List;
5+
import java.util.stream.Collectors;
6+
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
7+
import org.apache.calcite.plan.RelOptCluster;
8+
import org.apache.calcite.plan.RelOptPlanner;
9+
import org.apache.calcite.plan.RelOptTable;
10+
import org.apache.calcite.plan.hep.HepPlanner;
11+
import org.apache.calcite.plan.hep.HepProgram;
12+
import org.apache.calcite.prepare.Prepare;
13+
import org.apache.calcite.rel.RelNode;
14+
import org.apache.calcite.rel.RelRoot;
15+
import org.apache.calcite.rel.rules.CoreRules;
16+
import org.apache.calcite.rex.RexBuilder;
17+
import org.apache.calcite.sql.SqlNode;
18+
import org.apache.calcite.sql.parser.SqlParseException;
19+
import org.apache.calcite.sql.validate.SqlValidator;
20+
import org.apache.calcite.sql2rel.SqlToRelConverter;
21+
import org.apache.calcite.sql2rel.StandardConvertletTable;
22+
23+
/**
24+
* Substrait flavoured SQL processor provided as a utility for testing and experimentation,
25+
* utilizing {@link SubstraitSqlStatementParser} and {@link SubstraitSqlValidator}
26+
*/
27+
public class SubstraitSqlToCalcite {
28+
29+
/**
30+
* Converts a SQL statement to a Calcite {@link RelRoot}.
31+
*
32+
* @param sqlStatement a SQL statement string
33+
* @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in
34+
* the SQL statement
35+
* @return a {@link RelRoot} corresponding to the given SQL statement
36+
* @throws SqlParseException if there is an error while parsing the SQL statement
37+
*/
38+
public static RelRoot convertQuery(String sqlStatement, Prepare.CatalogReader catalogReader)
39+
throws SqlParseException {
40+
SqlValidator validator = new SubstraitSqlValidator(catalogReader);
41+
return convertQuery(sqlStatement, catalogReader, validator, createDefaultRelOptCluster());
42+
}
43+
44+
/**
45+
* Converts a SQL statement to a Calcite {@link RelRoot}.
46+
*
47+
* @param sqlStatement a SQL statement
48+
* @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in
49+
* the SQL statement
50+
* @param validator the {@link SqlValidator} used to validate the SQL statement. Allows for
51+
* additional control of SQL functions and operators via {@link
52+
* SqlValidator#getOperatorTable()}
53+
* @param cluster the {@link RelOptCluster} used when creating {@link RelNode}s during statement
54+
* processing. Calcite expects that the {@link RelOptCluster} used during statement processing
55+
* is the same as that used during query optimization.
56+
* @return {@link RelRoot} corresponding to the given SQL statement
57+
* @throws SqlParseException if there is an error while parsing the SQL statement string
58+
*/
59+
public static RelRoot convertQuery(
60+
String sqlStatement,
61+
Prepare.CatalogReader catalogReader,
62+
SqlValidator validator,
63+
RelOptCluster cluster)
64+
throws SqlParseException {
65+
List<SqlNode> sqlNodes = SubstraitSqlStatementParser.parseStatements(sqlStatement);
66+
if (sqlNodes.size() != 1) {
67+
throw new IllegalArgumentException(
68+
String.format("Expected one statement, found: %d", sqlNodes.size()));
69+
}
70+
List<RelRoot> relRoots = convert(sqlNodes, catalogReader, validator, cluster);
71+
// as there was only 1 statement, there should only be 1 root
72+
return relRoots.get(0);
73+
}
74+
75+
/**
76+
* Converts one or more SQL statements to a List of {@link RelRoot}, with one {@link RelRoot} per
77+
* statement.
78+
*
79+
* @param sqlStatements a string containing one or more SQL statements
80+
* @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in
81+
* the SQL statements
82+
* @return a list of {@link RelRoot}s corresponding to the given SQL statements
83+
* @throws SqlParseException if there is an error while parsing the SQL statements
84+
*/
85+
public static List<RelRoot> convertQueries(
86+
String sqlStatements, Prepare.CatalogReader catalogReader) throws SqlParseException {
87+
SqlValidator validator = new SubstraitSqlValidator(catalogReader);
88+
return convertQueries(sqlStatements, catalogReader, validator, createDefaultRelOptCluster());
89+
}
90+
91+
/**
92+
* Converts one or more SQL statements to a List of {@link RelRoot}, with one {@link RelRoot} per
93+
* statement.
94+
*
95+
* @param sqlStatements a string containing one or more SQL statements
96+
* @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in
97+
* the SQL statements
98+
* @param validator the {@link SqlValidator} used to validate SQL statements. Allows for
99+
* additional control of SQL functions and operators via {@link
100+
* SqlValidator#getOperatorTable()}
101+
* @param cluster the {@link RelOptCluster} used when creating {@link RelNode}s during statement
102+
* processing. Calcite expects that the {@link RelOptCluster} used during statement processing
103+
* is the same as that used during query optimization.
104+
* @return a list of {@link RelRoot}s corresponding to the given SQL statements
105+
* @throws SqlParseException if there is an error while parsing the SQL statements
106+
*/
107+
public static List<RelRoot> convertQueries(
108+
String sqlStatements,
109+
Prepare.CatalogReader catalogReader,
110+
SqlValidator validator,
111+
RelOptCluster cluster)
112+
throws SqlParseException {
113+
List<SqlNode> sqlNodes = SubstraitSqlStatementParser.parseStatements(sqlStatements);
114+
return convert(sqlNodes, catalogReader, validator, cluster);
115+
}
116+
117+
static List<RelRoot> convert(
118+
List<SqlNode> sqlNodes,
119+
Prepare.CatalogReader catalogReader,
120+
SqlValidator validator,
121+
RelOptCluster cluster) {
122+
RelOptTable.ViewExpander viewExpander = null;
123+
SqlToRelConverter converter =
124+
new SqlToRelConverter(
125+
viewExpander,
126+
validator,
127+
catalogReader,
128+
cluster,
129+
StandardConvertletTable.INSTANCE,
130+
SqlToRelConverter.CONFIG);
131+
// apply validation
132+
boolean needsValidation = true;
133+
// query is the root of the tree
134+
boolean top = true;
135+
return sqlNodes.stream()
136+
.map(
137+
sqlNode ->
138+
removeRedundantProjects(converter.convertQuery(sqlNode, needsValidation, top)))
139+
.collect(Collectors.toList());
140+
}
141+
142+
static RelOptCluster createDefaultRelOptCluster() {
143+
RexBuilder rexBuilder =
144+
new RexBuilder(new JavaTypeFactoryImpl(SubstraitTypeSystem.TYPE_SYSTEM));
145+
HepProgram program = HepProgram.builder().build();
146+
RelOptPlanner emptyPlanner = new HepPlanner(program);
147+
return RelOptCluster.create(emptyPlanner, rexBuilder);
148+
}
149+
150+
static RelRoot removeRedundantProjects(RelRoot root) {
151+
return root.withRel(removeRedundantProjects(root.rel));
152+
}
153+
154+
static RelNode removeRedundantProjects(RelNode root) {
155+
// The Calcite RelBuilder, when constructing Project that does not modify its inputs in any way,
156+
// simply elides it. The PROJECT_REMOVE rule can be used to remove such projects from Rel trees.
157+
// This facilitates roundtrip testing.
158+
HepProgram program = HepProgram.builder().addRuleInstance(CoreRules.PROJECT_REMOVE).build();
159+
HepPlanner planner = new HepPlanner(program);
160+
planner.setRoot(root);
161+
return planner.findBestExp();
162+
}
163+
}

isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import static org.junit.jupiter.api.Assertions.assertEquals;
55

66
import io.substrait.isthmus.sql.SubstraitCreateStatementParser;
7+
import io.substrait.isthmus.sql.SubstraitSqlToCalcite;
78
import io.substrait.plan.Plan;
89
import io.substrait.relation.NamedScan;
910
import java.util.List;
@@ -18,17 +19,18 @@ void preserveNamesFromSql() throws Exception {
1819
CalciteCatalogReader catalogReader =
1920
SubstraitCreateStatementParser.processCreateStatementsToCatalog(createStatement);
2021

21-
SqlToSubstrait s = new SqlToSubstrait();
2222
SubstraitToCalcite substraitToCalcite =
2323
new SubstraitToCalcite(EXTENSION_COLLECTION, typeFactory);
2424

2525
String query = "SELECT \"a\", \"B\" FROM foo GROUP BY a, b";
2626
List<String> expectedNames = List.of("a", "B");
2727

28-
Plan plan = s.convert(query, catalogReader);
29-
assertEquals(1, plan.getRoots().size());
28+
org.apache.calcite.rel.RelRoot calciteRelRoot1 =
29+
SubstraitSqlToCalcite.convertQuery(query, catalogReader);
30+
assertEquals(expectedNames, calciteRelRoot1.validatedRowType.getFieldNames());
3031

31-
io.substrait.plan.Plan.Root substraitRelRoot = plan.getRoots().get(0);
32+
io.substrait.plan.Plan.Root substraitRelRoot =
33+
SubstraitRelVisitor.convert(calciteRelRoot1, EXTENSION_COLLECTION);
3234
assertEquals(expectedNames, substraitRelRoot.getNames());
3335

3436
org.apache.calcite.rel.RelRoot calciteRelRoot2 = substraitToCalcite.convert(substraitRelRoot);

isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@
22

33
import static io.substrait.isthmus.SqlConverterBase.EXTENSION_COLLECTION;
44
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
5-
import static org.junit.jupiter.api.Assertions.assertEquals;
65

6+
import io.substrait.isthmus.sql.SubstraitSqlToCalcite;
77
import java.io.IOException;
8-
import java.util.List;
98
import org.apache.calcite.plan.hep.HepPlanner;
109
import org.apache.calcite.plan.hep.HepProgram;
1110
import org.apache.calcite.plan.hep.HepProgramBuilder;
@@ -24,11 +23,8 @@ void conversionHandlesBuiltInSum0CallAddedByRule() throws SqlParseException, IOE
2423
// verify that the query works generally
2524
assertFullRoundTrip(query);
2625

27-
SqlToSubstrait sqlConverter = new SqlToSubstrait();
28-
List<RelRoot> relRoots = sqlConverter.sqlToRelNode(query, TPCH_CATALOG);
29-
assertEquals(1, relRoots.size());
30-
RelRoot planRoot = relRoots.get(0);
31-
RelNode originalPlan = planRoot.rel;
26+
RelRoot relRoot = SubstraitSqlToCalcite.convertQuery(query, TPCH_CATALOG);
27+
RelNode originalPlan = relRoot.rel;
3228

3329
// Create a program to apply the AGGREGATE_EXPAND_DISTINCT_AGGREGATES_TO_JOIN rule.
3430
// This will introduce a SqlSumEmptyIsZeroAggFunction to the plan.
@@ -46,6 +42,6 @@ void conversionHandlesBuiltInSum0CallAddedByRule() throws SqlParseException, IOE
4642
assertDoesNotThrow(
4743
() ->
4844
// Conversion of the new plan should succeed
49-
SubstraitRelVisitor.convert(RelRoot.of(newPlan, planRoot.kind), EXTENSION_COLLECTION));
45+
SubstraitRelVisitor.convert(RelRoot.of(newPlan, relRoot.kind), EXTENSION_COLLECTION));
5046
}
5147
}

0 commit comments

Comments
 (0)