Skip to content

Commit c594067

Browse files
ES|QL: Improve random query generation tests (#121750)
1 parent 2c6dd6c commit c594067

File tree

3 files changed

+129
-16
lines changed

3 files changed

+129
-16
lines changed

x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/GenerativeIT.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import org.elasticsearch.xpack.esql.qa.rest.generative.GenerativeRestTest;
1616
import org.junit.ClassRule;
1717

18-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/102084")
18+
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/121754")
1919
@ThreadLeakFilters(filters = TestClustersThreadFilter.class)
2020
public class GenerativeIT extends GenerativeRestTest {
2121
@ClassRule

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/EsqlQueryGenerator.java

+94-12
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,13 @@ public record Column(String name, String type) {}
2828
public record QueryExecuted(String query, int depth, List<Column> outputSchema, Exception exception) {}
2929

3030
public static String sourceCommand(List<String> availabeIndices) {
31-
return switch (randomIntBetween(0, 2)) {
31+
return switch (randomIntBetween(0, 1)) {
3232
case 0 -> from(availabeIndices);
33-
case 1 -> metaFunctions();
34-
default -> row();
33+
// case 1 -> metaFunctions();
34+
default -> from(availabeIndices);
35+
// TODO re-enable ROW.
36+
// now it crashes nodes in some cases: exiting java.lang.AssertionError: estimated row size [0] wasn't set
37+
// default -> row();
3538
};
3639

3740
}
@@ -41,8 +44,12 @@ public static String sourceCommand(List<String> availabeIndices) {
4144
* @param policies
4245
* @return a new command that can process it as input
4346
*/
44-
public static String pipeCommand(List<Column> previousOutput, List<CsvTestsDataLoader.EnrichConfig> policies) {
45-
return switch (randomIntBetween(0, 11)) {
47+
public static String pipeCommand(
48+
List<Column> previousOutput,
49+
List<CsvTestsDataLoader.EnrichConfig> policies,
50+
List<GenerativeRestTest.LookupIdx> lookupIndices
51+
) {
52+
return switch (randomIntBetween(0, 12)) {
4653
case 0 -> dissect(previousOutput);
4754
case 1 -> drop(previousOutput);
4855
case 2 -> enrich(previousOutput, policies);
@@ -54,10 +61,26 @@ public static String pipeCommand(List<Column> previousOutput, List<CsvTestsDataL
5461
case 8 -> rename(previousOutput);
5562
case 9 -> sort(previousOutput);
5663
case 10 -> stats(previousOutput);
64+
case 11 -> join(previousOutput, lookupIndices);
5765
default -> where(previousOutput);
5866
};
5967
}
6068

69+
private static String join(List<Column> previousOutput, List<GenerativeRestTest.LookupIdx> lookupIndices) {
70+
71+
GenerativeRestTest.LookupIdx lookupIdx = randomFrom(lookupIndices);
72+
String lookupIdxName = lookupIdx.idxName();
73+
String idxKey = lookupIdx.key();
74+
String keyType = lookupIdx.keyType();
75+
76+
var candidateKeys = previousOutput.stream().filter(x -> x.type.equals(keyType)).toList();
77+
if (candidateKeys.isEmpty()) {
78+
return "";
79+
}
80+
Column key = randomFrom(candidateKeys);
81+
return "| rename " + key.name + " as " + idxKey + " | lookup join " + lookupIdxName + " on " + idxKey;
82+
}
83+
6184
private static String where(List<Column> previousOutput) {
6285
// TODO more complex conditions
6386
StringBuilder result = new StringBuilder(" | where ");
@@ -191,15 +214,66 @@ private static String keep(List<Column> previousOutput) {
191214
}
192215

193216
private static String randomName(List<Column> previousOutput) {
194-
return previousOutput.get(randomIntBetween(0, previousOutput.size() - 1)).name();
217+
// we need to exclude <all-fields-projected>
218+
// https://github.com/elastic/elasticsearch/issues/121741
219+
return randomFrom(previousOutput.stream().filter(x -> x.name().equals("<all-fields-projected>") == false).toList()).name();
220+
}
221+
222+
private static String randomGroupableName(List<Column> previousOutput) {
223+
// we need to exclude <all-fields-projected>
224+
// https://github.com/elastic/elasticsearch/issues/121741
225+
var candidates = previousOutput.stream()
226+
.filter(EsqlQueryGenerator::groupable)
227+
.filter(x -> x.name().equals("<all-fields-projected>") == false)
228+
.toList();
229+
if (candidates.isEmpty()) {
230+
return null;
231+
}
232+
return randomFrom(candidates).name();
233+
}
234+
235+
private static boolean groupable(Column col) {
236+
return col.type.equals("keyword")
237+
|| col.type.equals("text")
238+
|| col.type.equals("long")
239+
|| col.type.equals("integer")
240+
|| col.type.equals("ip")
241+
|| col.type.equals("version");
242+
}
243+
244+
private static String randomSortableName(List<Column> previousOutput) {
245+
// we need to exclude <all-fields-projected>
246+
// https://github.com/elastic/elasticsearch/issues/121741
247+
var candidates = previousOutput.stream()
248+
.filter(EsqlQueryGenerator::sortable)
249+
.filter(x -> x.name().equals("<all-fields-projected>") == false)
250+
.toList();
251+
if (candidates.isEmpty()) {
252+
return null;
253+
}
254+
return randomFrom(candidates).name();
255+
}
256+
257+
private static boolean sortable(Column col) {
258+
return col.type.equals("keyword")
259+
|| col.type.equals("text")
260+
|| col.type.equals("long")
261+
|| col.type.equals("integer")
262+
|| col.type.equals("ip")
263+
|| col.type.equals("version");
195264
}
196265

197266
private static String rename(List<Column> previousOutput) {
198267
int n = randomIntBetween(1, Math.min(3, previousOutput.size()));
199268
List<String> proj = new ArrayList<>();
200269
List<String> names = new ArrayList<>(previousOutput.stream().map(Column::name).collect(Collectors.toList()));
201270
for (int i = 0; i < n; i++) {
202-
String name = names.remove(randomIntBetween(0, names.size() - 1));
271+
var colN = randomIntBetween(0, names.size() - 1);
272+
if (previousOutput.get(colN).type().endsWith("_range")) {
273+
// ranges are not fully supported yet
274+
continue;
275+
}
276+
String name = names.remove(colN);
203277
String newName;
204278
if (names.isEmpty() || randomBoolean()) {
205279
newName = randomAlphaOfLength(5);
@@ -209,6 +283,9 @@ private static String rename(List<Column> previousOutput) {
209283
names.add(newName);
210284
proj.add(name + " AS " + newName);
211285
}
286+
if (proj.isEmpty()) {
287+
return "";
288+
}
212289
return " | rename " + proj.stream().collect(Collectors.joining(", "));
213290
}
214291

@@ -227,7 +304,7 @@ private static String drop(List<Column> previousOutput) {
227304
name = "*" + name.substring(randomIntBetween(1, name.length() - 1));
228305
}
229306
}
230-
proj.add(name);
307+
proj.add(name.contains("*") ? name : "`" + name + "`");
231308
}
232309
return " | drop " + proj.stream().collect(Collectors.joining(", "));
233310
}
@@ -236,7 +313,11 @@ private static String sort(List<Column> previousOutput) {
236313
int n = randomIntBetween(1, previousOutput.size());
237314
Set<String> proj = new HashSet<>();
238315
for (int i = 0; i < n; i++) {
239-
proj.add(randomName(previousOutput));
316+
String col = randomSortableName(previousOutput);
317+
if (col == null) {
318+
return "";// no sortable columns
319+
}
320+
proj.add(col);
240321
}
241322
return " | sort "
242323
+ proj.stream()
@@ -295,9 +376,10 @@ private static String stats(List<Column> previousOutput) {
295376
cmd.append(expression);
296377
}
297378
if (randomBoolean()) {
298-
cmd.append(" by ");
299-
300-
cmd.append(randomName(nonNull));
379+
var col = randomGroupableName(nonNull);
380+
if (col != null) {
381+
cmd.append(" by " + col);
382+
}
301383
}
302384
return cmd.toString();
303385
}

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java

+34-3
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,18 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
3535
public static final Set<String> ALLOWED_ERRORS = Set.of(
3636
"Reference \\[.*\\] is ambiguous",
3737
"Cannot use field \\[.*\\] due to ambiguities",
38-
"cannot sort on .*"
38+
"cannot sort on .*",
39+
"argument of \\[count_distinct\\(.*\\)\\] must",
40+
"Cannot use field \\[.*\\] with unsupported type \\[.*_range\\]",
41+
// warnings
42+
"Field '.*' shadowed by field at line .*",
43+
"evaluation of \\[.*\\] failed, treating result as null", // TODO investigate?
44+
// Awaiting fixes
45+
"estimated row size \\[0\\] wasn't set", // https://github.com/elastic/elasticsearch/issues/121739
46+
"unknown physical plan node \\[OrderExec\\]", // https://github.com/elastic/elasticsearch/issues/120817
47+
"Unknown column \\[<all-fields-projected>\\]", // https://github.com/elastic/elasticsearch/issues/121741
48+
//
49+
"The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
3950
);
4051

4152
public static final Set<Pattern> ALLOWED_ERROR_PATTERNS = ALLOWED_ERRORS.stream()
@@ -64,6 +75,7 @@ public static void wipeTestData() throws IOException {
6475

6576
public void test() {
6677
List<String> indices = availableIndices();
78+
List<LookupIdx> lookupIndices = lookupIndices();
6779
List<CsvTestsDataLoader.EnrichConfig> policies = availableEnrichPolicies();
6880
for (int i = 0; i < ITERATIONS; i++) {
6981
String command = EsqlQueryGenerator.sourceCommand(indices);
@@ -76,7 +88,7 @@ public void test() {
7688
if (result.outputSchema().isEmpty()) {
7789
break;
7890
}
79-
command = EsqlQueryGenerator.pipeCommand(result.outputSchema(), policies);
91+
command = EsqlQueryGenerator.pipeCommand(result.outputSchema(), policies, lookupIndices);
8092
result = execute(result.query() + command, result.depth() + 1);
8193
if (result.exception() != null) {
8294
checkException(result);
@@ -102,6 +114,9 @@ private EsqlQueryGenerator.QueryExecuted execute(String command, int depth) {
102114
return new EsqlQueryGenerator.QueryExecuted(command, depth, outputSchema, null);
103115
} catch (Exception e) {
104116
return new EsqlQueryGenerator.QueryExecuted(command, depth, null, e);
117+
} catch (AssertionError ae) {
118+
// this is for ensureNoWarnings()
119+
return new EsqlQueryGenerator.QueryExecuted(command, depth, null, new RuntimeException(ae.getMessage()));
105120
}
106121

107122
}
@@ -116,7 +131,23 @@ private List<EsqlQueryGenerator.Column> outputSchema(Map<String, Object> a) {
116131
}
117132

118133
private List<String> availableIndices() {
119-
return new ArrayList<>(CSV_DATASET_MAP.keySet());
134+
return new ArrayList<>(
135+
CSV_DATASET_MAP.entrySet()
136+
.stream()
137+
.filter(x -> x.getValue().requiresInferenceEndpoint() == false)
138+
.map(Map.Entry::getKey)
139+
.toList()
140+
);
141+
}
142+
143+
record LookupIdx(String idxName, String key, String keyType) {}
144+
145+
private List<LookupIdx> lookupIndices() {
146+
List<LookupIdx> result = new ArrayList<>();
147+
// we don't have key info from the dataset loader, let's hardcode it for now
148+
result.add(new LookupIdx("languages_lookup", "language_code", "integer"));
149+
result.add(new LookupIdx("message_types_lookup", "message", "keyword"));
150+
return result;
120151
}
121152

122153
List<CsvTestsDataLoader.EnrichConfig> availableEnrichPolicies() {

0 commit comments

Comments
 (0)