Skip to content

Commit c934908

Browse files
authored
[8.19] Bug fix, last optimised value in ESUTF8StreamJsonParser kept old value. (elastic#135184) (elastic#135622)
1 parent a740cb8 commit c934908

File tree

5 files changed

+147
-15
lines changed

5 files changed

+147
-15
lines changed

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@
2424
import java.util.ArrayList;
2525
import java.util.List;
2626

27+
/**
28+
* Provides the method getValueAsText that is a best-effort optimization for UTF8 fields. If the
29+
* {@link UTF8StreamJsonParser} has already parsed the text, then the caller should fall back to getText.
30+
* This is sufficient because when we call getText, jackson stores the parsed UTF-16 value for future calls.
31+
* Once we've already got the parsed UTF-16 value, the optimization isn't necessary.
32+
*/
2733
public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser {
2834
protected int stringEnd = -1;
2935
protected int stringLength;
@@ -51,6 +57,7 @@ public ESUTF8StreamJsonParser(
5157
* This is only a best-effort attempt; if there is some reason the bytes cannot be retrieved, this method will return null.
5258
*/
5359
public Text getValueAsText() throws IOException {
60+
// _tokenIncomplete is true when UTF8StreamJsonParser has already processed this value.
5461
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete) {
5562
if (lastOptimisedValue != null) {
5663
return new Text(new XContentString.UTF8Bytes(lastOptimisedValue), stringLength);
@@ -146,33 +153,33 @@ protected Text _finishAndReturnText() throws IOException {
146153

147154
@Override
148155
public JsonToken nextToken() throws IOException {
149-
maybeResetCurrentTokenState();
150-
stringEnd = -1;
156+
resetCurrentTokenState();
151157
return super.nextToken();
152158
}
153159

154160
@Override
155161
public boolean nextFieldName(SerializableString str) throws IOException {
156-
maybeResetCurrentTokenState();
157-
stringEnd = -1;
162+
resetCurrentTokenState();
158163
return super.nextFieldName(str);
159164
}
160165

161166
@Override
162167
public String nextFieldName() throws IOException {
163-
maybeResetCurrentTokenState();
164-
stringEnd = -1;
168+
resetCurrentTokenState();
165169
return super.nextFieldName();
166170
}
167171

168172
/**
169-
* Resets the current token state before moving to the next.
173+
* Resets the current token state before moving to the next. It resets the _inputPtr and the
174+
* _tokenIncomplete only if {@link UTF8StreamJsonParser#getText()} or {@link UTF8StreamJsonParser#getValueAsString()}
175+
* hasn't run yet.
170176
*/
171-
private void maybeResetCurrentTokenState() {
177+
private void resetCurrentTokenState() {
172178
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) {
173179
_inputPtr = stringEnd;
174180
_tokenIncomplete = false;
175-
lastOptimisedValue = null;
176181
}
182+
lastOptimisedValue = null;
183+
stringEnd = -1;
177184
}
178185
}

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentParser.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import com.fasterxml.jackson.core.JsonToken;
1717
import com.fasterxml.jackson.core.exc.InputCoercionException;
1818
import com.fasterxml.jackson.core.exc.StreamConstraintsException;
19+
import com.fasterxml.jackson.core.filter.FilteringParserDelegate;
1920
import com.fasterxml.jackson.core.io.JsonEOFException;
2021

2122
import org.elasticsearch.core.IOUtils;
@@ -146,7 +147,19 @@ public String text() throws IOException {
146147

147148
@Override
148149
public XContentString optimizedText() throws IOException {
149-
// TODO: enable utf-8 parsing optimization once verified it is completely safe
150+
if (currentToken().isValue() == false) {
151+
throwOnNoText();
152+
}
153+
var parser = this.parser;
154+
if (parser instanceof FilteringParserDelegate delegate) {
155+
parser = delegate.delegate();
156+
}
157+
if (parser instanceof ESUTF8StreamJsonParser esParser) {
158+
var bytesRef = esParser.getValueAsText();
159+
if (bytesRef != null) {
160+
return bytesRef;
161+
}
162+
}
150163
return new Text(text());
151164
}
152165

libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java

Lines changed: 109 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,23 @@
1313
import com.fasterxml.jackson.core.JsonParser;
1414
import com.fasterxml.jackson.core.JsonToken;
1515

16+
import org.elasticsearch.common.bytes.BytesArray;
17+
import org.elasticsearch.common.xcontent.XContentHelper;
1618
import org.elasticsearch.core.CheckedConsumer;
1719
import org.elasticsearch.test.ESTestCase;
20+
import org.elasticsearch.xcontent.FilterXContentParserWrapper;
21+
import org.elasticsearch.xcontent.XContentParser;
22+
import org.elasticsearch.xcontent.XContentParserConfiguration;
1823
import org.elasticsearch.xcontent.XContentString;
24+
import org.elasticsearch.xcontent.XContentType;
25+
import org.elasticsearch.xcontent.provider.XContentParserConfigurationImpl;
1926
import org.hamcrest.Matchers;
2027

2128
import java.io.IOException;
2229
import java.nio.charset.StandardCharsets;
2330
import java.util.Locale;
31+
import java.util.stream.Collectors;
32+
import java.util.stream.IntStream;
2433

2534
public class ESUTF8StreamJsonParserTests extends ESTestCase {
2635

@@ -272,12 +281,17 @@ public void testGetValueRandomized() throws IOException {
272281
var text = parser.getValueAsText();
273282
assertTextRef(text.bytes(), currVal);
274283
assertThat(text.stringLength(), Matchers.equalTo(currVal.length()));
275-
276-
// Retrieve it twice to ensure it works as expected
284+
// Retrieve it a second time
277285
text = parser.getValueAsText();
286+
assertThat(text, Matchers.notNullValue());
278287
assertTextRef(text.bytes(), currVal);
279288
assertThat(text.stringLength(), Matchers.equalTo(currVal.length()));
289+
// Use getText()
290+
assertThat(parser.getText(), Matchers.equalTo(text.string()));
291+
// After retrieving it with getText() we do not use the optimised value anymore.
292+
assertThat(parser.getValueAsText(), Matchers.nullValue());
280293
} else {
294+
assertThat(parser.getText(), Matchers.notNullValue());
281295
assertThat(parser.getValueAsText(), Matchers.nullValue());
282296
assertThat(parser.getValueAsString(), Matchers.equalTo(currVal));
283297
// Retrieve it twice to ensure it works as expected
@@ -288,4 +302,97 @@ public void testGetValueRandomized() throws IOException {
288302
});
289303
}
290304

305+
/**
306+
* This test compares the retrieval of an optimised text against the baseline.
307+
*/
308+
public void testOptimisedParser() throws Exception {
309+
for (int i = 0; i < 200; i++) {
310+
String json = randomJsonInput(randomIntBetween(1, 6));
311+
try (
312+
var baselineParser = XContentHelper.createParser(
313+
XContentParserConfiguration.EMPTY,
314+
new BytesArray(json),
315+
XContentType.JSON
316+
);
317+
var optimisedParser = TestXContentParser.create(json)
318+
) {
319+
var expected = baselineParser.mapOrdered();
320+
var actual = optimisedParser.mapOrdered();
321+
assertThat(expected, Matchers.equalTo(actual));
322+
}
323+
}
324+
}
325+
326+
private String randomJsonInput(int depth) {
327+
StringBuilder sb = new StringBuilder();
328+
sb.append('{');
329+
int numberOfFields = randomIntBetween(1, 10);
330+
for (int i = 0; i < numberOfFields; i++) {
331+
sb.append("\"k-").append(randomAlphanumericOfLength(10)).append("\":");
332+
if (depth == 0 || randomBoolean()) {
333+
if (randomIntBetween(0, 9) == 0) {
334+
sb.append(
335+
IntStream.range(0, randomIntBetween(1, 10))
336+
.mapToObj(ignored -> randomUTF8Value())
337+
.collect(Collectors.joining(",", "[", "]"))
338+
);
339+
} else {
340+
sb.append(randomUTF8Value());
341+
}
342+
} else {
343+
sb.append(randomJsonInput(depth - 1));
344+
}
345+
if (i < numberOfFields - 1) {
346+
sb.append(',');
347+
}
348+
}
349+
sb.append("}");
350+
return sb.toString();
351+
}
352+
353+
private String randomUTF8Value() {
354+
return "\"" + buildRandomInput(randomIntBetween(10, 50)).input + "\"";
355+
}
356+
357+
/**
358+
* This XContentParser introduces a random mix of getText() and getOptimisedText()
359+
* to simulate different access patterns for optimised fields.
360+
*/
361+
private static class TestXContentParser extends FilterXContentParserWrapper {
362+
363+
TestXContentParser(XContentParser delegate) {
364+
super(delegate);
365+
}
366+
367+
static TestXContentParser create(String input) throws IOException {
368+
JsonFactory factory = new ESJsonFactoryBuilder().build();
369+
assertThat(factory, Matchers.instanceOf(ESJsonFactory.class));
370+
371+
JsonParser parser = factory.createParser(StandardCharsets.UTF_8.encode(input).array());
372+
assertThat(parser, Matchers.instanceOf(ESUTF8StreamJsonParser.class));
373+
return new TestXContentParser(new JsonXContentParser(XContentParserConfigurationImpl.EMPTY, parser));
374+
}
375+
376+
@Override
377+
public String text() throws IOException {
378+
if (randomIntBetween(0, 9) < 8) {
379+
return super.text();
380+
} else {
381+
return super.optimizedText().string();
382+
}
383+
}
384+
385+
@Override
386+
public XContentString optimizedText() throws IOException {
387+
int extraCalls = randomIntBetween(0, 5);
388+
for (int i = 0; i < extraCalls; i++) {
389+
if (randomBoolean()) {
390+
super.optimizedText();
391+
} else {
392+
super.text();
393+
}
394+
}
395+
return super.optimizedText();
396+
}
397+
}
291398
}

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Keyword with escaped characters as multi-field:
33
- requires:
44
cluster_features: [ "mapper.multi_field.unicode_optimisation_fix" ]
5-
reason: "requires a fix (#134770)"
5+
reason: "Captures the scenarios in #134770 & 135256"
66
- do:
77
indices.create:
88
index: test
@@ -14,14 +14,20 @@ Keyword with escaped characters as multi-field:
1414
fields:
1515
bar:
1616
type: keyword
17+
bar-text:
18+
type: text
19+
my-ip:
20+
type: ip
1721

22+
# Ensure the IP is correctly parsed after a multi-field mapping that combines optimised and non-optimised fields
1823
- do:
1924
index:
2025
index: test
2126
id: "1"
2227
refresh: true
2328
body:
2429
foo: "c:\\windows\\system32\\svchost.exe"
30+
my-ip: "127.0.0.1"
2531

2632
- do:
2733
search:

server/src/test/java/org/elasticsearch/common/xcontent/json/JsonXContentTests.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,7 @@ public void testOptimizedTextHasBytes() throws Exception {
5959
assertSame(XContentParser.Token.FIELD_NAME, parser.nextToken());
6060
assertTrue(parser.nextToken().isValue());
6161
Text text = (Text) parser.optimizedText();
62-
// TODO: uncomment after utf8 optimized parsing has been enabled again:
63-
// assertTrue(text.hasBytes());
62+
assertTrue(text.hasBytes());
6463
assertThat(text.string(), equalTo("foo"));
6564
}
6665
}

0 commit comments

Comments
 (0)