Skip to content

Commit 55e620a

Browse files
committed
RFC-0005 Phase 2, annotated string functions and added tests.
1. Annotated the scalar functions in `StringFunctions` class, with `ScalarFunctionConstantStats` and `ScalarPropagateSourceStats` . 2. Added appropriate tests to check if the stats propagation works as expected. 3. Updated AbstractCostBasedPlanTests to generate plans with this feature on and off.
1 parent a9f04a8 commit 55e620a

File tree

8 files changed

+493
-72
lines changed

8 files changed

+493
-72
lines changed

presto-benchto-benchmarks/src/test/java/com/facebook/presto/sql/planner/AbstractCostBasedPlanTest.java

Lines changed: 43 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import com.facebook.presto.tpch.TpchTableHandle;
3030
import com.google.common.base.Strings;
3131
import com.google.common.base.VerifyException;
32+
import com.google.common.collect.ImmutableMap;
3233
import com.google.common.io.Resources;
3334
import org.testng.annotations.DataProvider;
3435
import org.testng.annotations.Test;
@@ -37,9 +38,11 @@
3738
import java.io.UncheckedIOException;
3839
import java.nio.file.Path;
3940
import java.nio.file.Paths;
41+
import java.util.Map;
4042
import java.util.stream.Stream;
4143

4244
import static com.facebook.presto.SystemSessionProperties.OPTIMIZER_USE_HISTOGRAMS;
45+
import static com.facebook.presto.SystemSessionProperties.SCALAR_FUNCTION_STATS_PROPAGATION_ENABLED;
4346
import static com.facebook.presto.spi.plan.JoinDistributionType.REPLICATED;
4447
import static com.facebook.presto.spi.plan.JoinType.INNER;
4548
import static com.facebook.presto.sql.Optimizer.PlanStage.OPTIMIZED_AND_VALIDATED;
@@ -59,6 +62,10 @@
5962
public abstract class AbstractCostBasedPlanTest
6063
extends BasePlanTest
6164
{
65+
private final Map<String, String> featureToOutputDir =
66+
ImmutableMap.of(OPTIMIZER_USE_HISTOGRAMS, "histogram",
67+
SCALAR_FUNCTION_STATS_PROPAGATION_ENABLED, "scalar_function_stats_propagation");
68+
6269
public AbstractCostBasedPlanTest(LocalQueryRunnerSupplier supplier)
6370
{
6471
super(supplier);
@@ -80,19 +87,21 @@ public void test(String queryResourcePath)
8087
}
8188

8289
@Test(dataProvider = "getQueriesDataProvider")
83-
public void histogramsPlansMatch(String queryResourcePath)
90+
public void featureSpecificPlansMatch(String queryResourcePath)
8491
{
8592
String sql = read(queryResourcePath);
86-
Session histogramSession = Session.builder(getQueryRunner().getDefaultSession())
87-
.setSystemProperty(OPTIMIZER_USE_HISTOGRAMS, "true")
88-
.build();
89-
Session noHistogramSession = Session.builder(getQueryRunner().getDefaultSession())
90-
.setSystemProperty(OPTIMIZER_USE_HISTOGRAMS, "false")
91-
.build();
92-
String regularPlan = generateQueryPlan(sql, noHistogramSession);
93-
String histogramPlan = generateQueryPlan(sql, histogramSession);
94-
if (!regularPlan.equals(histogramPlan)) {
95-
assertEquals(histogramPlan, read(getHistogramPlanResourcePath(getQueryPlanResourcePath(queryResourcePath))));
93+
for (Map.Entry<String, String> featureEntry : featureToOutputDir.entrySet()) {
94+
Session featureEnabledSession = Session.builder(getQueryRunner().getDefaultSession())
95+
.setSystemProperty(featureEntry.getKey(), "true")
96+
.build();
97+
Session featureDisabledSession = Session.builder(getQueryRunner().getDefaultSession())
98+
.setSystemProperty(featureEntry.getKey(), "false")
99+
.build();
100+
String regularPlan = generateQueryPlan(sql, featureDisabledSession);
101+
String featureEnabledPlan = generateQueryPlan(sql, featureEnabledSession);
102+
if (!regularPlan.equals(featureEnabledPlan)) {
103+
assertEquals(featureEnabledPlan, read(getSpecificPlanResourcePath(featureEntry.getValue(), getQueryPlanResourcePath(queryResourcePath))));
104+
}
96105
}
97106
}
98107

@@ -101,10 +110,10 @@ private String getQueryPlanResourcePath(String queryResourcePath)
101110
return queryResourcePath.replaceAll("\\.sql$", ".plan.txt");
102111
}
103112

104-
private String getHistogramPlanResourcePath(String regularPlanResourcePath)
113+
private String getSpecificPlanResourcePath(String outDirPath, String regularPlanResourcePath)
105114
{
106115
Path root = Paths.get(regularPlanResourcePath);
107-
return root.getParent().resolve("histogram/" + root.getFileName()).toString();
116+
return root.getParent().resolve(String.format("%s/%s", outDirPath, root.getFileName())).toString();
108117
}
109118

110119
private Path getResourceWritePath(String queryResourcePath)
@@ -124,25 +133,28 @@ public void generate()
124133
.parallel()
125134
.forEach(queryResourcePath -> {
126135
try {
127-
Path queryPlanWritePath = getResourceWritePath(queryResourcePath);
128-
createParentDirs(queryPlanWritePath.toFile());
129-
Session histogramSession = Session.builder(getQueryRunner().getDefaultSession())
130-
.setSystemProperty(OPTIMIZER_USE_HISTOGRAMS, "true")
131-
.build();
132-
Session noHistogramSession = Session.builder(getQueryRunner().getDefaultSession())
133-
.setSystemProperty(OPTIMIZER_USE_HISTOGRAMS, "false")
134-
.build();
135-
String sql = read(queryResourcePath);
136-
String regularPlan = generateQueryPlan(sql, noHistogramSession);
137-
String histogramPlan = generateQueryPlan(sql, histogramSession);
138-
write(regularPlan.getBytes(UTF_8), queryPlanWritePath.toFile());
139-
// write out the histogram plan if it differs
140-
if (!regularPlan.equals(histogramPlan)) {
141-
Path histogramPlanWritePath = getResourceWritePath(getHistogramPlanResourcePath(queryResourcePath));
142-
createParentDirs(histogramPlanWritePath.toFile());
143-
write(histogramPlan.getBytes(UTF_8), histogramPlanWritePath.toFile());
136+
for (Map.Entry<String, String> featureEntry : featureToOutputDir.entrySet()) {
137+
Path queryPlanWritePath = getResourceWritePath(queryResourcePath);
138+
createParentDirs(queryPlanWritePath.toFile());
139+
String sql = read(queryResourcePath);
140+
Session featuredisabledSession = Session.builder(getQueryRunner().getDefaultSession())
141+
.setSystemProperty(featureEntry.getKey(), "false")
142+
.build();
143+
String regularPlan = generateQueryPlan(sql, featuredisabledSession);
144+
Session featureEnabledSession = Session.builder(getQueryRunner().getDefaultSession())
145+
.setSystemProperty(featureEntry.getKey(), "true")
146+
.build();
147+
148+
String featureEnabledPlan = generateQueryPlan(sql, featureEnabledSession);
149+
write(regularPlan.getBytes(UTF_8), queryPlanWritePath.toFile());
150+
// write out the feature enabled plan if it differs
151+
if (!regularPlan.equals(featureEnabledPlan)) {
152+
Path featureEnabledPlanWritePath = getResourceWritePath(getSpecificPlanResourcePath(featureEntry.getValue(), queryResourcePath));
153+
createParentDirs(featureEnabledPlanWritePath.toFile());
154+
write(featureEnabledPlan.getBytes(UTF_8), featureEnabledPlanWritePath.toFile());
155+
}
156+
System.out.println("Generated expected plan for query: " + queryResourcePath);
144157
}
145-
System.out.println("Generated expected plan for query: " + queryResourcePath);
146158
}
147159
catch (IOException e) {
148160
throw new UncheckedIOException(e);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
remote exchange (GATHER, SINGLE, [])
2+
cross join:
3+
final aggregation over (c_first_name, c_last_name, s_store_name)
4+
local exchange (GATHER, SINGLE, [])
5+
remote exchange (REPARTITION, HASH, [c_first_name, c_last_name, s_store_name])
6+
partial aggregation over (c_first_name, c_last_name, s_store_name)
7+
final aggregation over (c_first_name, c_last_name, ca_state, i_color, i_current_price, i_manager_id, i_size, i_units, s_state, s_store_name)
8+
local exchange (GATHER, SINGLE, [])
9+
remote exchange (REPARTITION, HASH, [c_first_name, c_last_name, ca_state, i_color, i_current_price, i_manager_id, i_size, i_units, s_state, s_store_name])
10+
partial aggregation over (c_first_name, c_last_name, ca_state, i_color, i_current_price, i_manager_id, i_size, i_units, s_state, s_store_name)
11+
join (INNER, REPLICATED):
12+
join (INNER, PARTITIONED):
13+
remote exchange (REPARTITION, HASH, [ss_customer_sk])
14+
join (INNER, PARTITIONED):
15+
remote exchange (REPARTITION, HASH, [sr_item_sk, sr_ticket_number])
16+
scan store_returns
17+
local exchange (GATHER, SINGLE, [])
18+
remote exchange (REPARTITION, HASH, [i_item_sk, ss_ticket_number])
19+
join (INNER, REPLICATED):
20+
scan store_sales
21+
local exchange (GATHER, SINGLE, [])
22+
remote exchange (REPLICATE, BROADCAST, [])
23+
scan item
24+
local exchange (GATHER, SINGLE, [])
25+
remote exchange (REPARTITION, HASH, [c_customer_sk])
26+
scan customer
27+
local exchange (GATHER, SINGLE, [])
28+
remote exchange (REPLICATE, BROADCAST, [])
29+
join (INNER, REPLICATED):
30+
scan customer_address
31+
local exchange (GATHER, SINGLE, [])
32+
remote exchange (REPLICATE, BROADCAST, [])
33+
scan store
34+
local exchange (GATHER, SINGLE, [])
35+
remote exchange (REPLICATE, BROADCAST, [])
36+
final aggregation over ()
37+
local exchange (GATHER, SINGLE, [])
38+
remote exchange (GATHER, SINGLE, [])
39+
partial aggregation over ()
40+
final aggregation over (c_first_name_181, c_last_name_182, ca_state_199, i_color_168, i_current_price_156, i_manager_id_171, i_size_166, i_units_169, s_state_146, s_store_name_127)
41+
local exchange (GATHER, SINGLE, [])
42+
remote exchange (REPARTITION, HASH, [c_first_name_181, c_last_name_182, ca_state_199, i_color_168, i_current_price_156, i_manager_id_171, i_size_166, i_units_169, s_state_146, s_store_name_127])
43+
partial aggregation over (c_first_name_181, c_last_name_182, ca_state_199, i_color_168, i_current_price_156, i_manager_id_171, i_size_166, i_units_169, s_state_146, s_store_name_127)
44+
join (INNER, PARTITIONED):
45+
remote exchange (REPARTITION, HASH, [c_birth_country_187, s_zip_147])
46+
join (INNER, REPLICATED):
47+
join (INNER, PARTITIONED):
48+
remote exchange (REPARTITION, HASH, [ss_customer_sk_82])
49+
join (INNER, PARTITIONED):
50+
remote exchange (REPARTITION, HASH, [ss_item_sk_81, ss_ticket_number_88])
51+
join (INNER, REPLICATED):
52+
scan store_sales
53+
local exchange (GATHER, SINGLE, [])
54+
remote exchange (REPLICATE, BROADCAST, [])
55+
scan store
56+
local exchange (GATHER, SINGLE, [])
57+
remote exchange (REPARTITION, HASH, [sr_item_sk_104, sr_ticket_number_111])
58+
scan store_returns
59+
local exchange (GATHER, SINGLE, [])
60+
remote exchange (REPARTITION, HASH, [c_customer_sk_173])
61+
scan customer
62+
local exchange (GATHER, SINGLE, [])
63+
remote exchange (REPLICATE, BROADCAST, [])
64+
scan item
65+
local exchange (GATHER, SINGLE, [])
66+
remote exchange (REPARTITION, HASH, [ca_zip_200, upper_302])
67+
scan customer_address

0 commit comments

Comments
 (0)