Skip to content

ES|QL: Improve random query generation tests #121750

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import org.elasticsearch.xpack.esql.qa.rest.generative.GenerativeRestTest;
import org.junit.ClassRule;

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/102084")
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/121754")
@ThreadLeakFilters(filters = TestClustersThreadFilter.class)
public class GenerativeIT extends GenerativeRestTest {
@ClassRule
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,13 @@ public record Column(String name, String type) {}
public record QueryExecuted(String query, int depth, List<Column> outputSchema, Exception exception) {}

public static String sourceCommand(List<String> availabeIndices) {
return switch (randomIntBetween(0, 2)) {
return switch (randomIntBetween(0, 1)) {
case 0 -> from(availabeIndices);
case 1 -> metaFunctions();
default -> row();
// case 1 -> metaFunctions();
default -> from(availabeIndices);
// TODO re-enable ROW.
// now it crashes nodes in some cases: exiting java.lang.AssertionError: estimated row size [0] wasn't set
// default -> row();
};

}
Expand All @@ -41,8 +44,12 @@ public static String sourceCommand(List<String> availabeIndices) {
* @param policies
* @return a new command that can process it as input
*/
public static String pipeCommand(List<Column> previousOutput, List<CsvTestsDataLoader.EnrichConfig> policies) {
return switch (randomIntBetween(0, 11)) {
public static String pipeCommand(
List<Column> previousOutput,
List<CsvTestsDataLoader.EnrichConfig> policies,
List<GenerativeRestTest.LookupIdx> lookupIndices
) {
return switch (randomIntBetween(0, 12)) {
case 0 -> dissect(previousOutput);
case 1 -> drop(previousOutput);
case 2 -> enrich(previousOutput, policies);
Expand All @@ -54,10 +61,26 @@ public static String pipeCommand(List<Column> previousOutput, List<CsvTestsDataL
case 8 -> rename(previousOutput);
case 9 -> sort(previousOutput);
case 10 -> stats(previousOutput);
case 11 -> join(previousOutput, lookupIndices);
default -> where(previousOutput);
};
}

private static String join(List<Column> previousOutput, List<GenerativeRestTest.LookupIdx> lookupIndices) {

GenerativeRestTest.LookupIdx lookupIdx = randomFrom(lookupIndices);
String lookupIdxName = lookupIdx.idxName();
String idxKey = lookupIdx.key();
String keyType = lookupIdx.keyType();

var candidateKeys = previousOutput.stream().filter(x -> x.type.equals(keyType)).toList();
if (candidateKeys.isEmpty()) {
return "";
}
Column key = randomFrom(candidateKeys);
return "| rename " + key.name + " as " + idxKey + " | lookup join " + lookupIdxName + " on " + idxKey;
}

private static String where(List<Column> previousOutput) {
// TODO more complex conditions
StringBuilder result = new StringBuilder(" | where ");
Expand Down Expand Up @@ -191,15 +214,66 @@ private static String keep(List<Column> previousOutput) {
}

private static String randomName(List<Column> previousOutput) {
return previousOutput.get(randomIntBetween(0, previousOutput.size() - 1)).name();
// we need to exclude <all-fields-projected>
// https://github.com/elastic/elasticsearch/issues/121741
return randomFrom(previousOutput.stream().filter(x -> x.name().equals("<all-fields-projected>") == false).toList()).name();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extract "" into a constant

}

private static String randomGroupableName(List<Column> previousOutput) {
// we need to exclude <all-fields-projected>
// https://github.com/elastic/elasticsearch/issues/121741
var candidates = previousOutput.stream()
.filter(EsqlQueryGenerator::groupable)
.filter(x -> x.name().equals("<all-fields-projected>") == false)
.toList();
if (candidates.isEmpty()) {
return null;
}
return randomFrom(candidates).name();
}

private static boolean groupable(Column col) {
return col.type.equals("keyword")
|| col.type.equals("text")
|| col.type.equals("long")
|| col.type.equals("integer")
|| col.type.equals("ip")
|| col.type.equals("version");
Comment on lines +236 to +241
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

put those types into a set and simply do a map.contains()

}

private static String randomSortableName(List<Column> previousOutput) {
// we need to exclude <all-fields-projected>
// https://github.com/elastic/elasticsearch/issues/121741
var candidates = previousOutput.stream()
.filter(EsqlQueryGenerator::sortable)
.filter(x -> x.name().equals("<all-fields-projected>") == false)
.toList();
if (candidates.isEmpty()) {
return null;
}
return randomFrom(candidates).name();
}

private static boolean sortable(Column col) {
return col.type.equals("keyword")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as groupable

|| col.type.equals("text")
|| col.type.equals("long")
|| col.type.equals("integer")
|| col.type.equals("ip")
|| col.type.equals("version");
}

private static String rename(List<Column> previousOutput) {
int n = randomIntBetween(1, Math.min(3, previousOutput.size()));
List<String> proj = new ArrayList<>();
List<String> names = new ArrayList<>(previousOutput.stream().map(Column::name).collect(Collectors.toList()));
for (int i = 0; i < n; i++) {
String name = names.remove(randomIntBetween(0, names.size() - 1));
var colN = randomIntBetween(0, names.size() - 1);
if (previousOutput.get(colN).type().endsWith("_range")) {
// ranges are not fully supported yet
continue;
}
String name = names.remove(colN);
String newName;
if (names.isEmpty() || randomBoolean()) {
newName = randomAlphaOfLength(5);
Expand All @@ -209,6 +283,9 @@ private static String rename(List<Column> previousOutput) {
names.add(newName);
proj.add(name + " AS " + newName);
}
if (proj.isEmpty()) {
return "";
}
return " | rename " + proj.stream().collect(Collectors.joining(", "));
}

Expand All @@ -227,7 +304,7 @@ private static String drop(List<Column> previousOutput) {
name = "*" + name.substring(randomIntBetween(1, name.length() - 1));
}
}
proj.add(name);
proj.add(name.contains("*") ? name : "`" + name + "`");
}
return " | drop " + proj.stream().collect(Collectors.joining(", "));
}
Expand All @@ -236,7 +313,11 @@ private static String sort(List<Column> previousOutput) {
int n = randomIntBetween(1, previousOutput.size());
Set<String> proj = new HashSet<>();
for (int i = 0; i < n; i++) {
proj.add(randomName(previousOutput));
String col = randomSortableName(previousOutput);
if (col == null) {
return "";// no sortable columns
}
proj.add(col);
}
return " | sort "
+ proj.stream()
Expand Down Expand Up @@ -295,9 +376,10 @@ private static String stats(List<Column> previousOutput) {
cmd.append(expression);
}
if (randomBoolean()) {
cmd.append(" by ");

cmd.append(randomName(nonNull));
var col = randomGroupableName(nonNull);
if (col != null) {
cmd.append(" by " + col);
}
}
return cmd.toString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,18 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
public static final Set<String> ALLOWED_ERRORS = Set.of(
"Reference \\[.*\\] is ambiguous",
"Cannot use field \\[.*\\] due to ambiguities",
"cannot sort on .*"
"cannot sort on .*",
"argument of \\[count_distinct\\(.*\\)\\] must",
"Cannot use field \\[.*\\] with unsupported type \\[.*_range\\]",
// warnings
"Field '.*' shadowed by field at line .*",
"evaluation of \\[.*\\] failed, treating result as null", // TODO investigate?
// Awaiting fixes
"estimated row size \\[0\\] wasn't set", // https://github.com/elastic/elasticsearch/issues/121739
"unknown physical plan node \\[OrderExec\\]", // https://github.com/elastic/elasticsearch/issues/120817
"Unknown column \\[<all-fields-projected>\\]", // https://github.com/elastic/elasticsearch/issues/121741
//
"The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
);

public static final Set<Pattern> ALLOWED_ERROR_PATTERNS = ALLOWED_ERRORS.stream()
Expand Down Expand Up @@ -64,6 +75,7 @@ public static void wipeTestData() throws IOException {

public void test() {
List<String> indices = availableIndices();
List<LookupIdx> lookupIndices = lookupIndices();
List<CsvTestsDataLoader.EnrichConfig> policies = availableEnrichPolicies();
for (int i = 0; i < ITERATIONS; i++) {
String command = EsqlQueryGenerator.sourceCommand(indices);
Expand All @@ -76,7 +88,7 @@ public void test() {
if (result.outputSchema().isEmpty()) {
break;
}
command = EsqlQueryGenerator.pipeCommand(result.outputSchema(), policies);
command = EsqlQueryGenerator.pipeCommand(result.outputSchema(), policies, lookupIndices);
result = execute(result.query() + command, result.depth() + 1);
if (result.exception() != null) {
checkException(result);
Expand All @@ -102,6 +114,9 @@ private EsqlQueryGenerator.QueryExecuted execute(String command, int depth) {
return new EsqlQueryGenerator.QueryExecuted(command, depth, outputSchema, null);
} catch (Exception e) {
return new EsqlQueryGenerator.QueryExecuted(command, depth, null, e);
} catch (AssertionError ae) {
// this is for ensureNoWarnings()
return new EsqlQueryGenerator.QueryExecuted(command, depth, null, new RuntimeException(ae.getMessage()));
}

}
Expand All @@ -116,7 +131,23 @@ private List<EsqlQueryGenerator.Column> outputSchema(Map<String, Object> a) {
}

private List<String> availableIndices() {
return new ArrayList<>(CSV_DATASET_MAP.keySet());
return new ArrayList<>(
CSV_DATASET_MAP.entrySet()
.stream()
.filter(x -> x.getValue().requiresInferenceEndpoint() == false)
.map(Map.Entry::getKey)
.toList()
);
}

record LookupIdx(String idxName, String key, String keyType) {}

private List<LookupIdx> lookupIndices() {
List<LookupIdx> result = new ArrayList<>();
// we don't have key info from the dataset loader, let's hardcode it for now
result.add(new LookupIdx("languages_lookup", "language_code", "integer"));
result.add(new LookupIdx("message_types_lookup", "message", "keyword"));
return result;
}

List<CsvTestsDataLoader.EnrichConfig> availableEnrichPolicies() {
Expand Down