elastic · elasticsearchmachine · Apr 3, 2025 · Apr 3, 2025 · Apr 3, 2025
diff --git a/docs/changelog/125764.yaml b/docs/changelog/125764.yaml
@@ -0,0 +1,8 @@
+pr: 125764
+summary: Fix `ReplaceMissingFieldsWithNull`
+area: ES|QL
+type: bug
+issues:
+ - 126036
+ - 121754
+ - 126030
diff --git a/...plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/...plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java
@@ -64,6 +64,7 @@
 import org.elasticsearch.xpack.esql.parser.QueryParam;
 import org.elasticsearch.xpack.esql.plan.logical.Enrich;
 import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
+import org.elasticsearch.xpack.esql.plan.logical.Limit;
 import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
 import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation;
 import org.elasticsearch.xpack.esql.plan.logical.local.LocalSupplier;
@@ -132,6 +133,7 @@
 import static org.elasticsearch.xpack.esql.parser.ParserUtils.ParamClassification.PATTERN;
 import static org.elasticsearch.xpack.esql.parser.ParserUtils.ParamClassification.VALUE;
 import static org.hamcrest.Matchers.instanceOf;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 
@@ -393,6 +395,14 @@ public static <T> T as(Object node, Class<T> type) {
         return type.cast(node);
     }
 
+    public static Limit asLimit(Object node, Integer limitLiteral) {
+        Limit limit = as(node, Limit.class);
+        if (limitLiteral != null) {
+            assertEquals(as(limit.limit(), Literal.class).value(), limitLiteral);
+        }
+        return limit;
+    }
+
     public static Map<String, EsField> loadMapping(String name) {
         return LoadMapping.loadMapping(name);
     }

diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec
@@ -644,3 +644,21 @@ FROM airports
 abbrev:k  |  city_name:k  |  city_location:geo_point |  country:k  |  location:geo_point                       |  name:text                    |  region:text  |  boundary_wkt_length:i
 IDR       |  Indore       |  POINT(75.8472 22.7167)  |  India      |  POINT(75.8092915005895 22.727749187571)  |  Devi Ahilyabai Holkar Int'l  |  Indore City  |  231
 ;
+
+// Regression test for https://github.com/elastic/elasticsearch/issues/126030
+// We had wrong layouts from ReplaceMissingFieldsWithNull in case of indices that had relevant fields for the query,
+// but were **missing the field we enrich on**.
+fieldsInOtherIndicesBug
+required_capability: enrich_load
+required_capability: fix_replace_missing_field_with_null_duplicate_name_id_in_layout
+
+from *
+| keep author.keyword, book_no, scalerank, street, bytes_in, @timestamp, abbrev, city_location, distance, description, birth_date, language_code, intersects, client_ip, event_duration, version
+| enrich languages_policy on author.keyword
+| sort book_no
+| limit 1
+;
+
+author.keyword:keyword|book_no:keyword|scalerank:integer|street:keyword|bytes_in:ul|@timestamp:null|abbrev:keyword|city_location:geo_point|distance:double|description:null|birth_date:date|language_code:keyword|intersects:boolean|client_ip:null|event_duration:long|version:version|language_name:keyword
+Fyodor Dostoevsky     |1211           |null             |null          |null       |null           |null          |null                   |null           |null            |null           |null                 |null              |null          |null               |null           |null
+;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -524,7 +524,12 @@ public enum Cap {
         /**
          * Fix for regex folding with case-insensitive pattern https://github.com/elastic/elasticsearch/issues/118371
          */
-        FIXED_REGEX_FOLD;
+        FIXED_REGEX_FOLD,
+
+        /**
+         * Avoid duplicated channels with the same name id when executing ESQL queries.
+         */
+        FIX_REPLACE_MISSING_FIELD_WITH_NULL_DUPLICATE_NAME_ID_IN_LAYOUT;
 
         private final boolean enabled;
 

diff --git a/...g/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/...g/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java
@@ -8,13 +8,15 @@
 package org.elasticsearch.xpack.esql.optimizer.rules.logical.local;
 
 import org.elasticsearch.common.util.Maps;
+import org.elasticsearch.index.IndexMode;
 import org.elasticsearch.xpack.esql.core.expression.Alias;
+import org.elasticsearch.xpack.esql.core.expression.Attribute;
+import org.elasticsearch.xpack.esql.core.expression.AttributeSet;
 import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
 import org.elasticsearch.xpack.esql.core.expression.Literal;
 import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
 import org.elasticsearch.xpack.esql.core.type.DataType;
 import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext;
-import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
 import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
 import org.elasticsearch.xpack.esql.plan.logical.Eval;
 import org.elasticsearch.xpack.esql.plan.logical.Filter;
@@ -23,13 +25,12 @@
 import org.elasticsearch.xpack.esql.plan.logical.Project;
 import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
 import org.elasticsearch.xpack.esql.plan.logical.TopN;
-import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation;
 import org.elasticsearch.xpack.esql.rule.ParameterizedRule;
-import org.elasticsearch.xpack.esql.stats.SearchStats;
 
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+import java.util.function.Predicate;
 
 /**
  * Look for any fields used in the plan that are missing locally and replace them with null.
@@ -39,60 +40,84 @@ public class ReplaceMissingFieldWithNull extends ParameterizedRule<LogicalPlan,
 
     @Override
     public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLogicalOptimizerContext) {
-        return plan.transformUp(p -> missingToNull(p, localLogicalOptimizerContext.searchStats()));
-    }
+        // Fields from lookup indices don't need to be present on the node, and our search stats don't include them, anyway. Ignore them.
+        AttributeSet lookupFields = new AttributeSet();
+        plan.forEachUp(EsRelation.class, esRelation -> {
+            // Looking only for indices in LOOKUP mode is correct: during parsing, we assign the expected mode and even if a lookup index
+            // is used in the FROM command, it will not be marked with LOOKUP mode there - but STANDARD.
+            // It seems like we could instead just look for JOINs and walk down their right hand side to find lookup fields - but this does
+            // not work as this rule also gets called just on the right hand side of a JOIN, which means that we don't always know that
+            // we're inside the right (or left) branch of a JOIN node. (See PlannerUtils.localPlan - this looks for FragmentExecs and
+            // performs local logical optimization of the fragments; the right hand side of a LookupJoinExec can be a FragmentExec.)
+            if (esRelation.indexMode() == IndexMode.LOOKUP) {
+                lookupFields.addAll(esRelation.output());
+            }
+        });
 
-    private LogicalPlan missingToNull(LogicalPlan plan, SearchStats stats) {
-        if (plan instanceof EsRelation || plan instanceof LocalRelation) {
-            return plan;
-        }
+        // Do not use the attribute name, this can deviate from the field name for union types; use fieldName() instead.
+        // Also retain fields from lookup indices because we do not have stats for these.
+        Predicate<FieldAttribute> shouldBeRetained = f -> (localLogicalOptimizerContext.searchStats().exists(f.fieldName())
+            || lookupFields.contains(f));
 
-        if (plan instanceof Aggregate a) {
-            // don't do anything (for now)
-            return a;
-        }
-        // keep the aliased name
-        else if (plan instanceof Project project) {
-            var projections = project.projections();
-            List<NamedExpression> newProjections = new ArrayList<>(projections.size());
-            Map<DataType, Alias> nullLiteral = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size());
+        return plan.transformUp(p -> missingToNull(p, shouldBeRetained));
+    }
 
-            for (NamedExpression projection : projections) {
-                // Do not use the attribute name, this can deviate from the field name for union types.
-                if (projection instanceof FieldAttribute f && stats.exists(f.fieldName()) == false) {
+    private LogicalPlan missingToNull(LogicalPlan plan, Predicate<FieldAttribute> shouldBeRetained) {
+        if (plan instanceof EsRelation relation) {
+            // Remove missing fields from the EsRelation because this is not where we will obtain them from; replace them by an Eval right
+            // after, instead. This allows us to safely re-use the attribute ids of the corresponding FieldAttributes.
+            // This means that an EsRelation[field1, field2, field3] where field1 and field 3 are missing will be replaced by
+            // Project[field1, field2, field3] <- keeps the ordering intact
+            // \_Eval[field1 = null, field3 = null]
+            // \_EsRelation[field2]
+            List<Attribute> relationOutput = relation.output();
+            Map<DataType, Alias> nullLiterals = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size());
+            List<NamedExpression> newProjections = new ArrayList<>(relationOutput.size());
+            for (int i = 0, size = relationOutput.size(); i < size; i++) {
+                Attribute attr = relationOutput.get(i);
+                NamedExpression projection;
+                if (attr instanceof FieldAttribute f && (shouldBeRetained.test(f) == false)) {
                     DataType dt = f.dataType();
-                    Alias nullAlias = nullLiteral.get(f.dataType());
+                    Alias nullAlias = nullLiterals.get(dt);
                     // save the first field as null (per datatype)
                     if (nullAlias == null) {
+                        // Keep the same id so downstream query plans don't need updating
+                        // NOTE: THIS IS BRITTLE AND CAN LEAD TO BUGS.
+                        // In case some optimizer rule or so inserts a plan node that requires the field BEFORE the Eval that we're adding
+                        // on top of the EsRelation, this can trigger a field extraction in the physical optimizer phase, causing wrong
+                        // layouts due to a duplicate name id.
+                        // If someone reaches here AGAIN when debugging e.g. ClassCastExceptions NPEs from wrong layouts, we should probably
+                        // give up on this approach and instead insert EvalExecs in InsertFieldExtraction.
                         Alias alias = new Alias(f.source(), f.name(), Literal.of(f, null), f.id());
-                        nullLiteral.put(dt, alias);
+                        nullLiterals.put(dt, alias);
                         projection = alias.toAttribute();
                     }
-                    // otherwise point to it
+                    // otherwise point to it since this avoids creating field copies
                     else {
-                        // since avoids creating field copies
                         projection = new Alias(f.source(), f.name(), nullAlias.toAttribute(), f.id());
                     }
+                } else {
+                    projection = attr;
                 }
-
                 newProjections.add(projection);
             }
-            // add the first found field as null
-            if (nullLiteral.size() > 0) {
-                plan = new Eval(project.source(), project.child(), new ArrayList<>(nullLiteral.values()));
-                plan = new Project(project.source(), plan, newProjections);
+
+            if (nullLiterals.size() == 0) {
+                return plan;
             }
-        } else if (plan instanceof Eval
+
+            Eval eval = new Eval(plan.source(), relation, new ArrayList<>(nullLiterals.values()));
+            // This projection is redundant if there's another projection downstream (and no commands depend on the order until we hit it).
+            return new Project(plan.source(), eval, newProjections);
+        }
+
+        if (plan instanceof Eval
             || plan instanceof Filter
             || plan instanceof OrderBy
             || plan instanceof RegexExtract
             || plan instanceof TopN) {
-                plan = plan.transformExpressionsOnlyUp(
-                    FieldAttribute.class,
-                    // Do not use the attribute name, this can deviate from the field name for union types.
-                    f -> stats.exists(f.fieldName()) ? f : Literal.of(f, null)
-                );
-            }
+            return plan.transformExpressionsOnlyUp(FieldAttribute.class, f -> shouldBeRetained.test(f) ? f : Literal.of(f, null));
+        }
 
         return plan;
     }

diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/Layout.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/Layout.java
@@ -109,12 +109,11 @@ public Layout build() {
             for (ChannelSet set : channels) {
                 int channel = numberOfChannels++;
                 for (NameId id : set.nameIds) {
+                    // Duplicate name ids would mean that have 2 channels that are declared under the same id. That makes no sense - which
+                    // channel should subsequent operators use, then, when they want to refer to this id?
+                    assert (layout.containsKey(id) == false) : "Duplicate name ids are not allowed in layouts";
                     ChannelAndType next = new ChannelAndType(channel, set.type);
-                    ChannelAndType prev = layout.put(id, next);
-                    // Do allow multiple name to point to the same channel - see https://github.com/elastic/elasticsearch/pull/100238
-                    // if (prev != null) {
-                    // throw new IllegalArgumentException("Name [" + id + "] is on two channels [" + prev + "] and [" + next + "]");
-                    // }
+                    layout.put(id, next);
                 }
             }
             return new DefaultLayout(Collections.unmodifiableMap(layout), numberOfChannels);