Skip to content

Commit 364465e

Browse files
committed
Improves performance when stream copy optimization is enabled by not requiring symbol table copies.
1 parent d4e92dc commit 364465e

7 files changed

+224
-10
lines changed

src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.nio.charset.StandardCharsets;
2424
import java.util.ArrayList;
2525
import java.util.Arrays;
26+
import java.util.Collection;
2627
import java.util.HashMap;
2728
import java.util.Iterator;
2829
import java.util.List;
@@ -36,6 +37,7 @@
3637
import static com.amazon.ion.SystemSymbols.NAME_SID;
3738
import static com.amazon.ion.SystemSymbols.SYMBOLS_SID;
3839
import static com.amazon.ion.SystemSymbols.VERSION_SID;
40+
import static com.amazon.ion.impl._Private_Utils.safeEquals;
3941

4042
/**
4143
* An IonCursor capable of application-level parsing of binary Ion streams.
@@ -82,6 +84,12 @@ class IonReaderContinuableApplicationBinary extends IonReaderContinuableCoreBina
8284
// symbol table is encountered in the stream.
8385
private SymbolTable cachedReadOnlySymbolTable = null;
8486

87+
// The cached SymbolTable that was determined to be a superset of the reader's current symbol table during a call
88+
// to 'isSymbolTableSubsetOf'. This is set to null whenever the reader encounters a new symbol table. Therefore,
89+
// when non-null, determining whether the reader's symbol table is a subset of a given table is as simple as
90+
// checking whether that table is the same as 'lastSupersetSymbolTable'.
91+
private SymbolTable lastSupersetSymbolTable = null;
92+
8593
// The reusable annotation iterator.
8694
private final AnnotationSequenceIterator annotationIterator = new AnnotationSequenceIterator();
8795

@@ -206,6 +214,110 @@ private SymbolTable getSystemSymbolTable() {
206214
return SharedSymbolTable.getSystemSymbolTable(getIonMajorVersion());
207215
}
208216

217+
boolean compareSymbolTableImportsArrayToList(SymbolTable[] arr, int arrayLength, List<SymbolTable> list) {
218+
// Note: the array variant must begin with a system symbol table, while the list variant must not.
219+
if (arrayLength - 1 != list.size()) {
220+
return false;
221+
}
222+
for (int i = 1; i < arrayLength; i++) {
223+
// TODO amazon-ion/ion-java/issues/18 Currently, we check imports by their references, which
224+
// is overly strict; imports that have different references but the same symbols should pass the check.
225+
// However, this is a cheaper check and is compatible with how common Catalog implementations handle
226+
// shared tables.
227+
if (list.get(i - 1) != arr[i]) {
228+
return false;
229+
}
230+
}
231+
return true;
232+
}
233+
234+
boolean compareSymbolsArrayToCollection(String[] arr, int arrayLength, Collection<String> collection) {
235+
// Precondition: the collection contains at least as many elements as the array.
236+
Iterator<String> collectionIterator = collection.iterator();
237+
for (int i = 0; i < arrayLength; i++) {
238+
if (!safeEquals(arr[i], collectionIterator.next())) {
239+
return false;
240+
}
241+
}
242+
return true;
243+
}
244+
245+
/**
246+
* Determines whether the symbol table active at the reader's current position is a subset of another symbol table,
247+
* meaning that every symbol in the reader's symbol table is present and has the same symbol ID in the other
248+
* table.
249+
* @param other another symbol table.
250+
* @return true if the reader's symbol table is a subset of the other table; otherwise, false.
251+
*/
252+
boolean isSymbolTableSubsetOf(SymbolTable other)
253+
{
254+
if (lastSupersetSymbolTable != null) {
255+
// lastSupersetSymbolTable is reset when the reader's symbol table changes, so we know the reader's symbol
256+
// table is the same as it was when last compared. This is an optimization that avoids the more expensive
257+
// comparisons when this method is called repetitively within the same symbol table contexts. This
258+
// commonly happens during repetitive calls to IonWriter.writeValue(IonReader), which is used directly
259+
// by users and by the looping wrapper IonWriter.writeValues(IonReader).
260+
return other == lastSupersetSymbolTable && other.getMaxId() == lastSupersetSymbolTable.getMaxId();
261+
}
262+
263+
int numberOfLocalSymbols = localSymbolMaxOffset + 1;
264+
int maxId = imports.getMaxId() + numberOfLocalSymbols;
265+
266+
// Note: the first imported table is always the system symbol table.
267+
boolean isSystemSymbolTable = numberOfLocalSymbols == 0 && imports.getImportedTablesNoCopy().length == 1;
268+
boolean otherHasPrivateAttributes = other instanceof _Private_LocalSymbolTable;
269+
_Private_LocalSymbolTable otherLocal = otherHasPrivateAttributes ? (_Private_LocalSymbolTable) other : null;
270+
if (isSystemSymbolTable) {
271+
if (other.isSystemTable() && maxId == other.getMaxId()) {
272+
// Both represent the same system table.
273+
lastSupersetSymbolTable = other;
274+
return true;
275+
}
276+
// The other symbol table might not literally be the system symbol table, but if it's a local symbol table
277+
// with zero local symbols and zero imports, that counts.
278+
if (otherHasPrivateAttributes && otherLocal.getNumberOfLocalSymbols() == 0 && otherLocal.getImportedTablesAsList().isEmpty()) {
279+
lastSupersetSymbolTable = other;
280+
return true;
281+
}
282+
return false;
283+
}
284+
if (!otherHasPrivateAttributes) {
285+
// The reader's symbol table is not a system symbol table, but the other is. Other cannot be a superset.
286+
return false;
287+
}
288+
if (maxId > otherLocal.getMaxId()) return false;
289+
290+
// NOTE: the following uses of _Private_LocalSymbolTable utilize knowledge of the implementation used by
291+
// the binary writer, which has the only known use case for this method. Specifically, we call the interface
292+
// method variants that return lists instead of arrays because we know this matches the binary writer's symbol
293+
// table's internal representation and therefore does not require copying. If this method ends up being used
294+
// for other symbol table implementations, which is unlikely, we should add logic to choose the most efficient
295+
// variant to call for the particular implementation (such as by adding something like a `boolean usesArrays()`
296+
// method to the interface).
297+
298+
SymbolTable[] readerImports = imports.getImportedTablesNoCopy();
299+
if (!compareSymbolTableImportsArrayToList(readerImports, readerImports.length, otherLocal.getImportedTablesAsList())) {
300+
return false;
301+
}
302+
303+
// Superset extends subset if subset doesn't have any declared symbols.
304+
if (numberOfLocalSymbols == 0) {
305+
lastSupersetSymbolTable = other;
306+
return true;
307+
}
308+
309+
// Superset must have same/more declared (local) symbols than subset.
310+
if (numberOfLocalSymbols > otherLocal.getNumberOfLocalSymbols()) return false;
311+
312+
Collection<String> otherSymbols = otherLocal.getLocalSymbolsNoCopy();
313+
if (!compareSymbolsArrayToCollection(symbols, numberOfLocalSymbols, otherSymbols)) {
314+
return false;
315+
}
316+
317+
lastSupersetSymbolTable = other;
318+
return true;
319+
}
320+
209321
/**
210322
* Read-only snapshot of the local symbol table at the reader's current position.
211323
*/
@@ -431,6 +543,21 @@ public _Private_LocalSymbolTable makeCopy() {
431543
public SymbolTable[] getImportedTablesNoCopy() {
432544
return importedTables.getImportedTablesNoCopy();
433545
}
546+
547+
@Override
548+
public List<SymbolTable> getImportedTablesAsList() {
549+
throw new UnsupportedOperationException("Call getImportedTablesNoCopy() instead.");
550+
}
551+
552+
@Override
553+
public List<String> getLocalSymbolsNoCopy() {
554+
throw new UnsupportedOperationException("If this is needed, add a no-copy variant that returns an array.");
555+
}
556+
557+
@Override
558+
public int getNumberOfLocalSymbols() {
559+
return idToText.length;
560+
}
434561
}
435562

436563
/**
@@ -442,6 +569,7 @@ private void resetSymbolTable() {
442569
Arrays.fill(symbols, 0, localSymbolMaxOffset + 1, null);
443570
localSymbolMaxOffset = -1;
444571
cachedReadOnlySymbolTable = null;
572+
lastSupersetSymbolTable = null;
445573
}
446574

447575
/**
@@ -474,6 +602,7 @@ protected void restoreSymbolTable(SymbolTable symbolTable) {
474602
}
475603
localSymbolMaxOffset = snapshot.maxId - firstLocalSymbolId;
476604
System.arraycopy(snapshot.idToText, 0, symbols, 0, snapshot.idToText.length);
605+
lastSupersetSymbolTable = null;
477606
} else {
478607
// Note: this will only happen when `symbolTable` is the system symbol table.
479608
resetSymbolTable();
@@ -626,6 +755,9 @@ private void finishReadingSymbolTableStruct() {
626755
}
627756
localSymbolMaxOffset += newSymbols.size();
628757
}
758+
// Note: last superset table is reset even if new symbols were simply appended because there's no
759+
// guarantee those symbols are reflected in the superset table.
760+
lastSupersetSymbolTable = null;
629761
state = State.READING_VALUE;
630762
}
631763

src/main/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinary.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,11 @@ public boolean transferCurrentValue(_Private_ByteTransferSink writer) throws IOE
325325
return true;
326326
}
327327

328+
@Override
329+
public boolean isSymbolTableCompatible(SymbolTable symbolTable) {
330+
return isSymbolTableSubsetOf(symbolTable);
331+
}
332+
328333
@Override
329334
public <T> T asFacet(Class<T> facetType) {
330335
if (facetType == SpanProvider.class) {

src/main/java/com/amazon/ion/impl/LocalSymbolTable.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import com.amazon.ion.util.IonTextUtils;
3939
import java.io.IOException;
4040
import java.util.ArrayList;
41+
import java.util.Collection;
4142
import java.util.HashMap;
4243
import java.util.Iterator;
4344
import java.util.List;
@@ -610,6 +611,21 @@ public SymbolTable[] getImportedTablesNoCopy()
610611
return myImportsList.getImportedTablesNoCopy();
611612
}
612613

614+
@Override
615+
public List<SymbolTable> getImportedTablesAsList() {
616+
throw new UnsupportedOperationException("Call getImportedTablesNoCopy() instead.");
617+
}
618+
619+
@Override
620+
public Collection<String> getLocalSymbolsNoCopy() {
621+
throw new UnsupportedOperationException("If this is needed, add a no-copy variant that returns an array.");
622+
}
623+
624+
@Override
625+
public int getNumberOfLocalSymbols() {
626+
return mySymbolsCount;
627+
}
628+
613629
public void writeTo(IonWriter writer) throws IOException
614630
{
615631
IonReader reader = new SymbolTableReader(this);

src/main/java/com/amazon/ion/impl/_Private_ByteTransferReader.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
package com.amazon.ion.impl;
1717

1818
import com.amazon.ion.IonReader;
19+
import com.amazon.ion.SymbolTable;
20+
1921
import java.io.IOException;
2022

2123
/**
@@ -32,4 +34,12 @@ public interface _Private_ByteTransferReader
3234
*/
3335
public boolean transferCurrentValue(_Private_ByteTransferSink writer)
3436
throws IOException;
37+
38+
/**
39+
* Determines whether the reader's symbol table is compatible (i.e., a subset of) the given symbol table. When
40+
* true, values can be transferred from the reader to the writer verbatim.
41+
* @param symbolTable the symbol table active in the writer.
42+
* @return true if the reader's symbol table is compatible; otherwise, false.
43+
*/
44+
public boolean isSymbolTableCompatible(SymbolTable symbolTable);
3545
}

src/main/java/com/amazon/ion/impl/_Private_LocalSymbolTable.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22

33
import com.amazon.ion.SymbolTable;
44

5-
interface _Private_LocalSymbolTable extends SymbolTable {
5+
import java.util.Collection;
6+
import java.util.List;
7+
8+
public interface _Private_LocalSymbolTable extends SymbolTable {
69

710
/**
811
* @return a mutable copy of the symbol table.
@@ -22,4 +25,25 @@ interface _Private_LocalSymbolTable extends SymbolTable {
2225
* @see SymbolTable#getImportedTables()
2326
*/
2427
SymbolTable[] getImportedTablesNoCopy();
28+
29+
/**
30+
* Returns the imported symbol tables as a List without making a copy (if possible).
31+
* Like {@link #getImportedTables()}, the list does not include the system symbol table.
32+
*
33+
* @return the imported symbol tables. Does not include the system symbol table.
34+
*
35+
* @see SymbolTable#getImportedTables()
36+
*/
37+
List<SymbolTable> getImportedTablesAsList();
38+
39+
/**
40+
* Returns a collection containing the local symbols, without making a copy.
41+
* @return the local symbols.
42+
*/
43+
Collection<String> getLocalSymbolsNoCopy();
44+
45+
/**
46+
* @return the number of local symbols, which do not include the imported or system symbols.
47+
*/
48+
int getNumberOfLocalSymbols();
2549
}

src/main/java/com/amazon/ion/impl/bin/AbstractIonWriter.java

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,11 @@
2929
}
3030

3131
/** The cache for copy optimization checks--null if not copy optimized. */
32-
private final _Private_SymtabExtendsCache symtabExtendsCache;
32+
private final boolean isStreamCopyOptimized;
3333

3434
/*package*/ AbstractIonWriter(final WriteValueOptimization optimization)
3535
{
36-
this.symtabExtendsCache = optimization == WriteValueOptimization.COPY_OPTIMIZED
37-
? new _Private_SymtabExtendsCache() : null;
36+
this.isStreamCopyOptimized = optimization == WriteValueOptimization.COPY_OPTIMIZED;
3837
}
3938

4039
public final void writeValue(final IonValue value) throws IOException
@@ -56,11 +55,10 @@ public final void writeValue(final IonReader reader) throws IOException
5655

5756
if (isStreamCopyOptimized() && reader instanceof _Private_ByteTransferReader)
5857
{
59-
if (_Private_Utils.isNonSymbolScalar(type)
60-
|| symtabExtendsCache.symtabsCompat(getSymbolTable(), reader.getSymbolTable()))
58+
_Private_ByteTransferReader byteTransferReader = (_Private_ByteTransferReader) reader;
59+
if (_Private_Utils.isNonSymbolScalar(type) || byteTransferReader.isSymbolTableCompatible(getSymbolTable()))
6160
{
62-
// we have something we can pipe over
63-
if (((_Private_ByteTransferReader) reader).transferCurrentValue(this)) {
61+
if (byteTransferReader.transferCurrentValue(this)) {
6462
return;
6563
}
6664
}
@@ -220,7 +218,7 @@ public final void writeTimestampUTC(final Date value) throws IOException
220218

221219
public final boolean isStreamCopyOptimized()
222220
{
223-
return symtabExtendsCache != null;
221+
return isStreamCopyOptimized;
224222
}
225223

226224
@SuppressWarnings("deprecation")

src/main/java/com/amazon/ion/impl/bin/IonManagedBinaryWriter.java

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import com.amazon.ion.SymbolToken;
3838
import com.amazon.ion.Timestamp;
3939
import com.amazon.ion.UnknownSymbolException;
40+
import com.amazon.ion.impl._Private_LocalSymbolTable;
4041
import com.amazon.ion.impl.bin.IonRawBinaryWriter.StreamCloseMode;
4142
import com.amazon.ion.impl.bin.IonRawBinaryWriter.StreamFlushMode;
4243
import java.io.IOException;
@@ -45,6 +46,7 @@
4546
import java.math.BigInteger;
4647
import java.util.ArrayList;
4748
import java.util.Arrays;
49+
import java.util.Collection;
4850
import java.util.Collections;
4951
import java.util.HashMap;
5052
import java.util.Iterator;
@@ -548,7 +550,7 @@ public void writeInt(IonManagedBinaryWriter self, BigInteger value) throws IOExc
548550
private static final SymbolTable[] EMPTY_SYMBOL_TABLE_ARRAY = new SymbolTable[0];
549551

550552
/** View over the internal local symbol table state as a symbol table. */
551-
private class LocalSymbolTableView extends AbstractSymbolTable
553+
private class LocalSymbolTableView extends AbstractSymbolTable implements _Private_LocalSymbolTable
552554
{
553555
public LocalSymbolTableView()
554556
{
@@ -637,6 +639,33 @@ public void makeReadOnly()
637639
{
638640
localsLocked = true;
639641
}
642+
643+
@Override
644+
public _Private_LocalSymbolTable makeCopy() {
645+
throw new UnsupportedOperationException();
646+
}
647+
648+
@Override
649+
public SymbolTable[] getImportedTablesNoCopy() {
650+
throw new UnsupportedOperationException("Call getImportedTablesAsList() instead.");
651+
}
652+
653+
@Override
654+
public List<SymbolTable> getImportedTablesAsList() {
655+
return imports.parents;
656+
}
657+
658+
@Override
659+
public Collection<String> getLocalSymbolsNoCopy() {
660+
// Note: this is correct because `locals` is a LinkedHashMap, which orders keys in order of insertion.
661+
// Therefore, the returned collection will return symbols in symbol ID order.
662+
return locals.keySet();
663+
}
664+
665+
@Override
666+
public int getNumberOfLocalSymbols() {
667+
return locals.size();
668+
}
640669
}
641670

642671
private final IonCatalog catalog;

0 commit comments

Comments
 (0)