From b7903b70f9d840cb62a7340014953d3800ce13c9 Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Wed, 23 Apr 2025 17:11:53 +0200 Subject: [PATCH 1/4] Have CombineProjections propate references upwards This will have CombineProjections allow references from the "under" plan be kept when merging stacked Projections. This is to prevent field attributes that are dropped by ReplaceMissingFieldWithNull be used in the resulting plan. --- .../xpack/esql/core/expression/Alias.java | 3 +- .../rules/logical/CombineProjections.java | 3 ++ .../local/ReplaceMissingFieldWithNull.java | 4 +- .../LocalLogicalPlanOptimizerTests.java | 43 +++++++++++++++++-- .../LocalPhysicalPlanOptimizerTests.java | 19 ++++---- 5 files changed, 56 insertions(+), 16 deletions(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Alias.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Alias.java index 1f7d03ba9d905..7195fdfb933a1 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Alias.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Alias.java @@ -93,7 +93,8 @@ protected NodeInfo info() { } public Alias replaceChild(Expression child) { - return new Alias(source(), name(), child, id(), synthetic()); + // these "nop" replacements can occur on attribute map resolutions having the default return same as the lookup key + return child == this.child ? this : new Alias(source(), name(), child, id(), synthetic()); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java index eaccf3155cde5..e9ca958c5e979 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java @@ -15,6 +15,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; @@ -126,6 +127,8 @@ private static List combineProjections(List replaced = new ArrayList<>(upper.size()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java index 236d0863f7ff9..504ebb84d5ec7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java @@ -58,7 +58,7 @@ public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLog // Do not use the attribute name, this can deviate from the field name for union types; use fieldName() instead. // Also retain fields from lookup indices because we do not have stats for these. Predicate shouldBeRetained = f -> f.field() instanceof PotentiallyUnmappedKeywordEsField - || (localLogicalOptimizerContext.searchStats().exists(f.fieldName()) || lookupFields.contains(f)); + || localLogicalOptimizerContext.searchStats().exists(f.fieldName()) || lookupFields.contains(f); return plan.transformUp(p -> missingToNull(p, shouldBeRetained)); } @@ -77,7 +77,7 @@ private LogicalPlan missingToNull(LogicalPlan plan, Predicate sh for (int i = 0, size = relationOutput.size(); i < size; i++) { Attribute attr = relationOutput.get(i); NamedExpression projection; - if (attr instanceof FieldAttribute f && (shouldBeRetained.test(f) == false)) { + if (attr instanceof FieldAttribute f && shouldBeRetained.test(f) == false) { DataType dt = f.dataType(); Alias nullAlias = nullLiterals.get(dt); // save the first field as null (per datatype) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java index ea95152d038ac..0d806771787dd 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.optimizer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.util.Maps; import org.elasticsearch.index.IndexMode; @@ -22,6 +23,7 @@ import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.FoldContext; import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -149,10 +151,10 @@ public void testMissingFieldInFilterString() { /** * Expects - * Project[[last_name{f}#6]] + * Project[[last_name{r}#7]] * \_Eval[[null[KEYWORD] AS last_name]] - * \_Limit[10000[INTEGER]] - * \_EsRelation[test][_meta_field{f}#8, emp_no{f}#2, first_name{f}#3, gen..] + * \_Limit[1000[INTEGER],false] + * \_EsRelation[test][_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gen..] */ public void testMissingFieldInProject() { var plan = plan(""" @@ -166,7 +168,7 @@ public void testMissingFieldInProject() { var project = as(localPlan, Project.class); var projections = project.projections(); assertThat(Expressions.names(projections), contains("last_name")); - as(projections.get(0), FieldAttribute.class); + as(projections.get(0), ReferenceAttribute.class); var eval = as(project.child(), Eval.class); assertThat(Expressions.names(eval.fields()), contains("last_name")); var alias = as(eval.fields().get(0), Alias.class); @@ -179,6 +181,39 @@ public void testMissingFieldInProject() { assertThat(Expressions.names(source.output()), not(contains("last_name"))); } + /* + * Expects a similar plan to testMissingFieldInProject() above, except for the Alias's child value + * Project[[last_name{r}#4]] + * \_Eval[[[66 6f 6f][KEYWORD] AS last_name]] + * \_Limit[1000[INTEGER],false] + * \_EsRelation[test][_meta_field{f}#11, emp_no{f}#5, first_name{f}#6, ge..] + */ + public void testReassignedMissingFieldInProject() { + var plan = plan(""" + from test + | keep last_name + | eval last_name = "foo" + """); + + var testStats = statsForMissingField("last_name"); + var localPlan = localPlan(plan, testStats); + + var project = as(localPlan, Project.class); + var projections = project.projections(); + assertThat(Expressions.names(projections), contains("last_name")); + as(projections.get(0), ReferenceAttribute.class); + var eval = as(project.child(), Eval.class); + assertThat(Expressions.names(eval.fields()), contains("last_name")); + var alias = as(eval.fields().get(0), Alias.class); + var literal = as(alias.child(), Literal.class); + assertThat(literal.value(), is(new BytesRef("foo"))); + assertThat(literal.dataType(), is(DataType.KEYWORD)); + + var limit = as(eval.child(), Limit.class); + var source = as(limit.child(), EsRelation.class); + assertThat(Expressions.names(source.output()), not(contains("last_name"))); + } + /** * Expects * EsqlProject[[first_name{f}#4]] diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index ddbecf52a3f12..9c54a233e016f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -39,8 +39,8 @@ import org.elasticsearch.xpack.esql.analysis.Verifier; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Expressions; -import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; @@ -1307,15 +1307,16 @@ private EsQueryExec doTestOutOfRangeFilterPushdown(String query, Analyzer analyz return luceneQuery; } - /** + /* * Expects * LimitExec[1000[INTEGER]] - * \_ExchangeExec[[],false] - * \_ProjectExec[[_meta_field{f}#8, emp_no{r}#2, first_name{r}#3, gender{f}#4, job{f}#9, job.raw{f}#10, languages{f}#5, first_n - * ame{r}#3 AS last_name, long_noidx{f}#11, emp_no{r}#2 AS salary]] - * \_FieldExtractExec[_meta_field{f}#8, gender{f}#4, job{f}#9, job.raw{f}..] + * \_ExchangeExec[[_meta_field{f}#8, emp_no{f}#2, first_name{f}#3, gender{f}#4, hire_date{f}#9, job{f}#10, job.raw{f}#11, langua + * ges{f}#5, last_name{f}#6, long_noidx{f}#12, salary{f}#7],false] + * \_ProjectExec[[_meta_field{f}#8, emp_no{r}#2, first_name{r}#3, gender{f}#4, hire_date{f}#9, job{f}#10, job.raw{f}#11, langua + * ges{f}#5, first_name{r}#3 AS last_name, long_noidx{f}#12, emp_no{r}#2 AS salary]] + * \_FieldExtractExec[_meta_field{f}#8, gender{f}#4, hire_date{f}#9, job{..]<[],[]> * \_EvalExec[[null[INTEGER] AS emp_no, null[KEYWORD] AS first_name]] - * \_EsQueryExec[test], query[][_doc{f}#12], limit[1000], sort[] estimatedRowSize[270] + * \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#13], limit[1000], sort[] estimatedRowSize[278] */ public void testMissingFieldsDoNotGetExtracted() { var stats = EsqlTestUtils.statsForMissingField("first_name", "last_name", "emp_no", "salary"); @@ -1342,9 +1343,9 @@ public void testMissingFieldsDoNotGetExtracted() { ) ); // emp_no - assertThat(projections.get(1), instanceOf(FieldAttribute.class)); + assertThat(projections.get(1), instanceOf(ReferenceAttribute.class)); // first_name - assertThat(projections.get(2), instanceOf(FieldAttribute.class)); + assertThat(projections.get(2), instanceOf(ReferenceAttribute.class)); // last_name --> first_name var nullAlias = Alias.unwrap(projections.get(8)); From 461c755ea85365cfdefc16c35be86ee8c919041f Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Wed, 23 Apr 2025 17:23:16 +0200 Subject: [PATCH 2/4] Update docs/changelog/127264.yaml --- docs/changelog/127264.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/127264.yaml diff --git a/docs/changelog/127264.yaml b/docs/changelog/127264.yaml new file mode 100644 index 0000000000000..6080903b9f6a4 --- /dev/null +++ b/docs/changelog/127264.yaml @@ -0,0 +1,5 @@ +pr: 127264 +summary: Have `CombineProjections` propagate references upwards +area: Compute Engine +type: bug +issues: [] From 6c1c5bd328b58cada2e2dfed6c417f0d984ba5c6 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 23 Apr 2025 15:30:06 +0000 Subject: [PATCH 3/4] [CI] Auto commit changes from spotless --- .../rules/logical/local/ReplaceMissingFieldWithNull.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java index 504ebb84d5ec7..0d92c5830ff8b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java @@ -58,7 +58,8 @@ public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLog // Do not use the attribute name, this can deviate from the field name for union types; use fieldName() instead. // Also retain fields from lookup indices because we do not have stats for these. Predicate shouldBeRetained = f -> f.field() instanceof PotentiallyUnmappedKeywordEsField - || localLogicalOptimizerContext.searchStats().exists(f.fieldName()) || lookupFields.contains(f); + || localLogicalOptimizerContext.searchStats().exists(f.fieldName()) + || lookupFields.contains(f); return plan.transformUp(p -> missingToNull(p, shouldBeRetained)); } From d1dbe3b262607e8fb283c8c6a82c107442c32feb Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Wed, 23 Apr 2025 17:31:26 +0200 Subject: [PATCH 4/4] Delete docs/changelog/127264.yaml --- docs/changelog/127264.yaml | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 docs/changelog/127264.yaml diff --git a/docs/changelog/127264.yaml b/docs/changelog/127264.yaml deleted file mode 100644 index 6080903b9f6a4..0000000000000 --- a/docs/changelog/127264.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 127264 -summary: Have `CombineProjections` propagate references upwards -area: Compute Engine -type: bug -issues: []