@@ -106,6 +106,9 @@ public class FSTCompiler<T> {
106
106
107
107
private final IntsRefBuilder lastInput = new IntsRefBuilder ();
108
108
109
+ // indicates whether we are not yet to write the padding byte
110
+ private boolean paddingBytePending ;
111
+
109
112
// NOTE: cutting this over to ArrayList instead loses ~6%
110
113
// in build performance on 9.8M Wikipedia terms; so we
111
114
// left this as an array:
@@ -160,15 +163,14 @@ private FSTCompiler(
160
163
boolean allowFixedLengthArcs ,
161
164
DataOutput dataOutput ,
162
165
float directAddressingMaxOversizingFactor ,
163
- int version )
164
- throws IOException {
166
+ int version ) {
165
167
this .allowFixedLengthArcs = allowFixedLengthArcs ;
166
168
this .directAddressingMaxOversizingFactor = directAddressingMaxOversizingFactor ;
167
169
this .version = version ;
168
170
// pad: ensure no node gets address 0 which is reserved to mean
169
- // the stop state w/ no arcs
170
- dataOutput .writeByte ((byte ) 0 );
171
+ // the stop state w/ no arcs. the actual byte will be written lazily
171
172
numBytesWritten ++;
173
+ paddingBytePending = true ;
172
174
this .dataOutput = dataOutput ;
173
175
fst =
174
176
new FST <>(
@@ -340,7 +342,7 @@ public Builder<T> setVersion(int version) {
340
342
}
341
343
342
344
/** Creates a new {@link FSTCompiler}. */
343
- public FSTCompiler <T > build () throws IOException {
345
+ public FSTCompiler <T > build () {
344
346
// create a default DataOutput if not specified
345
347
if (dataOutput == null ) {
346
348
dataOutput = getOnHeapReaderWriter (15 );
@@ -548,13 +550,27 @@ long addNode(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
548
550
}
549
551
550
552
reverseScratchBytes ();
553
+ // write the padding byte if needed
554
+ if (paddingBytePending ) {
555
+ writePaddingByte ();
556
+ }
551
557
scratchBytes .writeTo (dataOutput );
552
558
numBytesWritten += scratchBytes .getPosition ();
553
559
554
560
nodeCount ++;
555
561
return numBytesWritten - 1 ;
556
562
}
557
563
564
+ /**
565
+ * Write the padding byte, ensure no node gets address 0 which is reserved to mean the stop state
566
+ * w/ no arcs
567
+ */
568
+ private void writePaddingByte () throws IOException {
569
+ assert paddingBytePending ;
570
+ dataOutput .writeByte ((byte ) 0 );
571
+ paddingBytePending = false ;
572
+ }
573
+
558
574
private void writeLabel (DataOutput out , int v ) throws IOException {
559
575
assert v >= 0 : "v=" + v ;
560
576
if (fst .metadata .inputType == INPUT_TYPE .BYTE1 ) {
@@ -963,7 +979,11 @@ public FST<T> compile() throws IOException {
963
979
freezeTail (0 );
964
980
if (root .numArcs == 0 ) {
965
981
if (fst .metadata .emptyOutput == null ) {
982
+ // return null for completely empty FST which accepts nothing
966
983
return null ;
984
+ } else {
985
+ // we haven't written the padding byte so far, but the FST is still valid
986
+ writePaddingByte ();
967
987
}
968
988
}
969
989
0 commit comments