Skip to content

Commit 701619d

Browse files
authored
Lazily write the FST padding byte (#12981)
* lazily write the FST padding byte * Also write the pad byte when there is emptyOutput * add comment * Add more comments
1 parent 09837ba commit 701619d

File tree

1 file changed

+25
-5
lines changed

1 file changed

+25
-5
lines changed

lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ public class FSTCompiler<T> {
106106

107107
private final IntsRefBuilder lastInput = new IntsRefBuilder();
108108

109+
// indicates whether we are not yet to write the padding byte
110+
private boolean paddingBytePending;
111+
109112
// NOTE: cutting this over to ArrayList instead loses ~6%
110113
// in build performance on 9.8M Wikipedia terms; so we
111114
// left this as an array:
@@ -160,15 +163,14 @@ private FSTCompiler(
160163
boolean allowFixedLengthArcs,
161164
DataOutput dataOutput,
162165
float directAddressingMaxOversizingFactor,
163-
int version)
164-
throws IOException {
166+
int version) {
165167
this.allowFixedLengthArcs = allowFixedLengthArcs;
166168
this.directAddressingMaxOversizingFactor = directAddressingMaxOversizingFactor;
167169
this.version = version;
168170
// pad: ensure no node gets address 0 which is reserved to mean
169-
// the stop state w/ no arcs
170-
dataOutput.writeByte((byte) 0);
171+
// the stop state w/ no arcs. the actual byte will be written lazily
171172
numBytesWritten++;
173+
paddingBytePending = true;
172174
this.dataOutput = dataOutput;
173175
fst =
174176
new FST<>(
@@ -340,7 +342,7 @@ public Builder<T> setVersion(int version) {
340342
}
341343

342344
/** Creates a new {@link FSTCompiler}. */
343-
public FSTCompiler<T> build() throws IOException {
345+
public FSTCompiler<T> build() {
344346
// create a default DataOutput if not specified
345347
if (dataOutput == null) {
346348
dataOutput = getOnHeapReaderWriter(15);
@@ -548,13 +550,27 @@ long addNode(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
548550
}
549551

550552
reverseScratchBytes();
553+
// write the padding byte if needed
554+
if (paddingBytePending) {
555+
writePaddingByte();
556+
}
551557
scratchBytes.writeTo(dataOutput);
552558
numBytesWritten += scratchBytes.getPosition();
553559

554560
nodeCount++;
555561
return numBytesWritten - 1;
556562
}
557563

564+
/**
565+
* Write the padding byte, ensure no node gets address 0 which is reserved to mean the stop state
566+
* w/ no arcs
567+
*/
568+
private void writePaddingByte() throws IOException {
569+
assert paddingBytePending;
570+
dataOutput.writeByte((byte) 0);
571+
paddingBytePending = false;
572+
}
573+
558574
private void writeLabel(DataOutput out, int v) throws IOException {
559575
assert v >= 0 : "v=" + v;
560576
if (fst.metadata.inputType == INPUT_TYPE.BYTE1) {
@@ -963,7 +979,11 @@ public FST<T> compile() throws IOException {
963979
freezeTail(0);
964980
if (root.numArcs == 0) {
965981
if (fst.metadata.emptyOutput == null) {
982+
// return null for completely empty FST which accepts nothing
966983
return null;
984+
} else {
985+
// we haven't written the padding byte so far, but the FST is still valid
986+
writePaddingByte();
967987
}
968988
}
969989

0 commit comments

Comments
 (0)