10
10
import org .elasticsearch .common .util .Maps ;
11
11
import org .elasticsearch .index .IndexMode ;
12
12
import org .elasticsearch .xpack .esql .core .expression .Alias ;
13
+ import org .elasticsearch .xpack .esql .core .expression .Attribute ;
13
14
import org .elasticsearch .xpack .esql .core .expression .AttributeSet ;
14
15
import org .elasticsearch .xpack .esql .core .expression .FieldAttribute ;
15
16
import org .elasticsearch .xpack .esql .core .expression .Literal ;
16
17
import org .elasticsearch .xpack .esql .core .expression .NamedExpression ;
17
18
import org .elasticsearch .xpack .esql .core .type .DataType ;
18
19
import org .elasticsearch .xpack .esql .core .type .PotentiallyUnmappedKeywordEsField ;
19
20
import org .elasticsearch .xpack .esql .optimizer .LocalLogicalOptimizerContext ;
20
- import org .elasticsearch .xpack .esql .plan .logical .Aggregate ;
21
21
import org .elasticsearch .xpack .esql .plan .logical .EsRelation ;
22
22
import org .elasticsearch .xpack .esql .plan .logical .Eval ;
23
23
import org .elasticsearch .xpack .esql .plan .logical .Filter ;
26
26
import org .elasticsearch .xpack .esql .plan .logical .Project ;
27
27
import org .elasticsearch .xpack .esql .plan .logical .RegexExtract ;
28
28
import org .elasticsearch .xpack .esql .plan .logical .TopN ;
29
- import org .elasticsearch .xpack .esql .plan .logical .join .Join ;
30
- import org .elasticsearch .xpack .esql .plan .logical .local .LocalRelation ;
31
29
import org .elasticsearch .xpack .esql .rule .ParameterizedRule ;
32
- import org .elasticsearch .xpack .esql .stats .SearchStats ;
33
30
34
31
import java .util .ArrayList ;
35
32
import java .util .List ;
36
33
import java .util .Map ;
34
+ import java .util .function .Predicate ;
37
35
38
36
/**
39
37
* Look for any fields used in the plan that are missing locally and replace them with null.
@@ -45,82 +43,83 @@ public class ReplaceMissingFieldWithNull extends ParameterizedRule<LogicalPlan,
45
43
public LogicalPlan apply (LogicalPlan plan , LocalLogicalOptimizerContext localLogicalOptimizerContext ) {
46
44
var lookupFieldsBuilder = AttributeSet .builder ();
47
45
plan .forEachUp (EsRelation .class , esRelation -> {
46
+ // Looking only for indices in LOOKUP mode is correct: during parsing, we assign the expected mode and even if a lookup index
47
+ // is used in the FROM command, it will not be marked with LOOKUP mode there - but STANDARD.
48
+ // It seems like we could instead just look for JOINs and walk down their right hand side to find lookup fields - but this does
49
+ // not work as this rule also gets called just on the right hand side of a JOIN, which means that we don't always know that
50
+ // we're inside the right (or left) branch of a JOIN node. (See PlannerUtils.localPlan - this looks for FragmentExecs and
51
+ // performs local logical optimization of the fragments; the right hand side of a LookupJoinExec can be a FragmentExec.)
48
52
if (esRelation .indexMode () == IndexMode .LOOKUP ) {
49
53
lookupFieldsBuilder .addAll (esRelation .output ());
50
54
}
51
55
});
56
+ AttributeSet lookupFields = lookupFieldsBuilder .build ();
52
57
53
- return plan .transformUp (p -> missingToNull (p , localLogicalOptimizerContext .searchStats (), lookupFieldsBuilder .build ()));
54
- }
55
-
56
- private LogicalPlan missingToNull (LogicalPlan plan , SearchStats stats , AttributeSet lookupFields ) {
57
- if (plan instanceof EsRelation || plan instanceof LocalRelation ) {
58
- return plan ;
59
- }
58
+ // Do not use the attribute name, this can deviate from the field name for union types; use fieldName() instead.
59
+ // Also retain fields from lookup indices because we do not have stats for these.
60
+ Predicate <FieldAttribute > shouldBeRetained = f -> f .field () instanceof PotentiallyUnmappedKeywordEsField
61
+ || (localLogicalOptimizerContext .searchStats ().exists (f .fieldName ()) || lookupFields .contains (f ));
60
62
61
- if (plan instanceof Aggregate a ) {
62
- // don't do anything (for now)
63
- return a ;
64
- }
65
- // keep the aliased name
66
- else if (plan instanceof Project project ) {
67
- var projections = project .projections ();
68
- List <NamedExpression > newProjections = new ArrayList <>(projections .size ());
69
- Map <DataType , Alias > nullLiteral = Maps .newLinkedHashMapWithExpectedSize (DataType .types ().size ());
70
- AttributeSet joinAttributes = joinAttributes (project );
63
+ return plan .transformUp (p -> missingToNull (p , shouldBeRetained ));
64
+ }
71
65
72
- for (NamedExpression projection : projections ) {
73
- // Do not use the attribute name, this can deviate from the field name for union types.
74
- if (projection instanceof FieldAttribute f
75
- && stats .exists (f .fieldName ()) == false
76
- && joinAttributes .contains (f ) == false
77
- && f .field () instanceof PotentiallyUnmappedKeywordEsField == false ) {
78
- // TODO: Should do a searchStats lookup for join attributes instead of just ignoring them here
79
- // See TransportSearchShardsAction
66
+ private LogicalPlan missingToNull (LogicalPlan plan , Predicate <FieldAttribute > shouldBeRetained ) {
67
+ if (plan instanceof EsRelation relation ) {
68
+ // Remove missing fields from the EsRelation because this is not where we will obtain them from; replace them by an Eval right
69
+ // after, instead. This allows us to safely re-use the attribute ids of the corresponding FieldAttributes.
70
+ // This means that an EsRelation[field1, field2, field3] where field1 and field 3 are missing will be replaced by
71
+ // Project[field1, field2, field3] <- keeps the ordering intact
72
+ // \_Eval[field1 = null, field3 = null]
73
+ // \_EsRelation[field2]
74
+ List <Attribute > relationOutput = relation .output ();
75
+ Map <DataType , Alias > nullLiterals = Maps .newLinkedHashMapWithExpectedSize (DataType .types ().size ());
76
+ List <NamedExpression > newProjections = new ArrayList <>(relationOutput .size ());
77
+ for (int i = 0 , size = relationOutput .size (); i < size ; i ++) {
78
+ Attribute attr = relationOutput .get (i );
79
+ NamedExpression projection ;
80
+ if (attr instanceof FieldAttribute f && (shouldBeRetained .test (f ) == false )) {
80
81
DataType dt = f .dataType ();
81
- Alias nullAlias = nullLiteral .get (f . dataType () );
82
+ Alias nullAlias = nullLiterals .get (dt );
82
83
// save the first field as null (per datatype)
83
84
if (nullAlias == null ) {
85
+ // Keep the same id so downstream query plans don't need updating
86
+ // NOTE: THIS IS BRITTLE AND CAN LEAD TO BUGS.
87
+ // In case some optimizer rule or so inserts a plan node that requires the field BEFORE the Eval that we're adding
88
+ // on top of the EsRelation, this can trigger a field extraction in the physical optimizer phase, causing wrong
89
+ // layouts due to a duplicate name id.
90
+ // If someone reaches here AGAIN when debugging e.g. ClassCastExceptions NPEs from wrong layouts, we should probably
91
+ // give up on this approach and instead insert EvalExecs in InsertFieldExtraction.
84
92
Alias alias = new Alias (f .source (), f .name (), Literal .of (f , null ), f .id ());
85
- nullLiteral .put (dt , alias );
93
+ nullLiterals .put (dt , alias );
86
94
projection = alias .toAttribute ();
87
95
}
88
- // otherwise point to it
96
+ // otherwise point to it since this avoids creating field copies
89
97
else {
90
- // since avoids creating field copies
91
98
projection = new Alias (f .source (), f .name (), nullAlias .toAttribute (), f .id ());
92
99
}
100
+ } else {
101
+ projection = attr ;
93
102
}
94
-
95
103
newProjections .add (projection );
96
104
}
97
- // add the first found field as null
98
- if (nullLiteral .size () > 0 ) {
99
- plan = new Eval (project .source (), project .child (), new ArrayList <>(nullLiteral .values ()));
100
- plan = new Project (project .source (), plan , newProjections );
105
+
106
+ if (nullLiterals .size () == 0 ) {
107
+ return plan ;
101
108
}
102
- } else if (plan instanceof Eval
109
+
110
+ Eval eval = new Eval (plan .source (), relation , new ArrayList <>(nullLiterals .values ()));
111
+ // This projection is redundant if there's another projection downstream (and no commands depend on the order until we hit it).
112
+ return new Project (plan .source (), eval , newProjections );
113
+ }
114
+
115
+ if (plan instanceof Eval
103
116
|| plan instanceof Filter
104
117
|| plan instanceof OrderBy
105
118
|| plan instanceof RegexExtract
106
119
|| plan instanceof TopN ) {
107
- plan = plan .transformExpressionsOnlyUp (
108
- FieldAttribute .class ,
109
- // Do not use the attribute name, this can deviate from the field name for union types.
110
- // Also skip fields from lookup indices because we do not have stats for these.
111
- // TODO: We do have stats for lookup indices in case they are being used in the FROM clause; this can be refined.
112
- f -> f .field () instanceof PotentiallyUnmappedKeywordEsField || (stats .exists (f .fieldName ()) || lookupFields .contains (f ))
113
- ? f
114
- : Literal .of (f , null )
115
- );
116
- }
120
+ return plan .transformExpressionsOnlyUp (FieldAttribute .class , f -> shouldBeRetained .test (f ) ? f : Literal .of (f , null ));
121
+ }
117
122
118
123
return plan ;
119
124
}
120
-
121
- private static AttributeSet joinAttributes (Project project ) {
122
- var attributesBuilder = AttributeSet .builder ();
123
- project .forEachDown (Join .class , j -> j .right ().forEachDown (EsRelation .class , p -> attributesBuilder .addAll (p .output ())));
124
- return attributesBuilder .build ();
125
- }
126
125
}
0 commit comments