Skip to content

Commit ecb9f91

Browse files
committed
Tidy code
1 parent 515ddca commit ecb9f91

File tree

4 files changed

+183
-139
lines changed

4 files changed

+183
-139
lines changed

lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,8 @@ private void parse() throws IOException {
448448
}
449449

450450
// Interleaves all output tokens onto the futureOutputs:
451-
private void addOutput(BytesRef bytes, int matchInputLength, int matchEndOffset) throws IOException {
451+
private void addOutput(BytesRef bytes, int matchInputLength, int matchEndOffset)
452+
throws IOException {
452453
bytesReader.reset(bytes.bytes, bytes.offset, bytes.length);
453454

454455
final int code = bytesReader.readVInt();

lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilterFactory.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@ public SynonymGraphFilterFactory(Map<String, String> args) {
116116
}
117117
}
118118
String compiledSynonymsPathArg = get(args, "compiledSynonymsPath");
119-
compiledSynonymsPath = compiledSynonymsPathArg == null ? null : Path.of(compiledSynonymsPathArg);
119+
compiledSynonymsPath =
120+
compiledSynonymsPathArg == null ? null : Path.of(compiledSynonymsPathArg);
120121
if (!args.isEmpty()) {
121122
throw new IllegalArgumentException("Unknown parameters: " + args);
122123
}
@@ -178,18 +179,18 @@ protected SynonymMap loadSynonyms(
178179
}
179180
if (compiledSynonymsDirectory == null || compiledSynonymsDirectory.hasSynonyms() == false) {
180181
CharsetDecoder decoder =
181-
StandardCharsets.UTF_8
182-
.newDecoder()
183-
.onMalformedInput(CodingErrorAction.REPORT)
184-
.onUnmappableCharacter(CodingErrorAction.REPORT);
182+
StandardCharsets.UTF_8
183+
.newDecoder()
184+
.onMalformedInput(CodingErrorAction.REPORT)
185+
.onUnmappableCharacter(CodingErrorAction.REPORT);
185186

186187
SynonymMap.Parser parser;
187188
Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
188189
try {
189190
parser =
190-
clazz
191-
.getConstructor(boolean.class, boolean.class, Analyzer.class)
192-
.newInstance(dedup, expand, analyzer);
191+
clazz
192+
.getConstructor(boolean.class, boolean.class, Analyzer.class)
193+
.newInstance(dedup, expand, analyzer);
193194
} catch (Exception e) {
194195
throw new RuntimeException(e);
195196
}

lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -224,12 +224,15 @@ public SynonymMap build() throws IOException {
224224
return build(null);
225225
}
226226

227-
/** Builds an {@link SynonymMap} and returns it.
228-
* If directory is non-null, it will write the compiled SynonymMap to disk and return an off-heap version. */
227+
/**
228+
* Builds an {@link SynonymMap} and returns it. If directory is non-null, it will write the
229+
* compiled SynonymMap to disk and return an off-heap version.
230+
*/
229231
public SynonymMap build(SynonymMapDirectory directory) throws IOException {
230232
ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
231233
// TODO: are we using the best sharing options?
232-
FSTCompiler.Builder<BytesRef> fstCompilerBuilder = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
234+
FSTCompiler.Builder<BytesRef> fstCompilerBuilder =
235+
new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
233236
IndexOutput fstOutput = null;
234237
if (directory != null) {
235238
fstOutput = directory.fstOutput();
@@ -316,17 +319,18 @@ public SynonymMap build(SynonymMapDirectory directory) throws IOException {
316319
directory.writeMetadata(words.size(), maxHorizontalContext, fst);
317320
return directory.readMap();
318321
}
319-
BytesRefHashLike wordsLike = new BytesRefHashLike() {
320-
@Override
321-
public void get(int id, BytesRef scratch) {
322-
words.get(id, scratch);
323-
}
324-
};
322+
BytesRefHashLike wordsLike =
323+
new BytesRefHashLike() {
324+
@Override
325+
public void get(int id, BytesRef scratch) {
326+
words.get(id, scratch);
327+
}
328+
};
325329
return new SynonymMap(fst, wordsLike, maxHorizontalContext);
326330
}
327331
}
328332

329-
static abstract class BytesRefHashLike {
333+
abstract static class BytesRefHashLike {
330334
public abstract void get(int id, BytesRef scratch) throws IOException;
331335
}
332336

Lines changed: 158 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -1,147 +1,185 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
118
package org.apache.lucene.analysis.synonym;
219

20+
import java.io.Closeable;
21+
import java.io.IOException;
22+
import java.nio.file.Path;
23+
import java.util.List;
324
import org.apache.lucene.store.Directory;
425
import org.apache.lucene.store.FSDirectory;
526
import org.apache.lucene.store.IOContext;
627
import org.apache.lucene.store.IndexInput;
728
import org.apache.lucene.store.IndexOutput;
8-
import org.apache.lucene.util.ArrayUtil;
929
import org.apache.lucene.util.BytesRef;
10-
import org.apache.lucene.util.BytesRefBuilder;
1130
import org.apache.lucene.util.fst.ByteSequenceOutputs;
1231
import org.apache.lucene.util.fst.FST;
1332
import org.apache.lucene.util.fst.OffHeapFSTStore;
1433

15-
import java.io.Closeable;
16-
import java.io.IOException;
17-
import java.nio.file.Path;
18-
import java.util.List;
19-
34+
/**
35+
* Wraps an {@link FSDirectory} to read and write a compiled {@link SynonymMap}. When reading, the
36+
* FST and output words are kept off-heap.
37+
*/
2038
public class SynonymMapDirectory implements Closeable {
21-
private final SynonymMapFormat synonymMapFormat = new SynonymMapFormat(); // TODO -- Should this be more flexible/codec-like? Less?
22-
private final Directory directory;
23-
public SynonymMapDirectory(Path path) throws IOException {
24-
directory = FSDirectory.open(path);
25-
}
26-
27-
public IndexOutput fstOutput() throws IOException {
28-
return synonymMapFormat.getFSTOutput(directory);
39+
private final SynonymMapFormat synonymMapFormat =
40+
new SynonymMapFormat(); // TODO -- Should this be more flexible/codec-like? Less?
41+
private final Directory directory;
42+
43+
public SynonymMapDirectory(Path path) throws IOException {
44+
directory = FSDirectory.open(path);
45+
}
46+
47+
public IndexOutput fstOutput() throws IOException {
48+
return synonymMapFormat.getFSTOutput(directory);
49+
}
50+
51+
public WordsOutput wordsOutput() throws IOException {
52+
return synonymMapFormat.getWordsOutput(directory);
53+
}
54+
55+
public void writeMetadata(int wordCount, int maxHorizontalContext, FST<BytesRef> fst)
56+
throws IOException {
57+
synonymMapFormat.writeMetadata(directory, wordCount, maxHorizontalContext, fst);
58+
}
59+
60+
public SynonymMap readMap() throws IOException {
61+
return synonymMapFormat.readSynonymMap(directory);
62+
}
63+
64+
public boolean hasSynonyms() throws IOException {
65+
// TODO should take the path to the synonyms file to compare file hash against file used to
66+
// build the directory
67+
return directory.listAll().length > 0;
68+
}
69+
70+
@Override
71+
public void close() throws IOException {
72+
directory.close();
73+
}
74+
75+
/**
76+
* Abstraction to support writing individual output words to the directory. Should be closed after
77+
* the last word is written.
78+
*/
79+
public abstract static class WordsOutput implements Closeable {
80+
public abstract void addWord(BytesRef word) throws IOException;
81+
}
82+
83+
private static class SynonymMapFormat {
84+
private static final String FST_FILE = "synonyms.fst";
85+
private static final String WORDS_FILE = "synonyms.wrd";
86+
private static final String METADATA_FILE = "synonyms.mdt";
87+
88+
public IndexOutput getFSTOutput(Directory directory) throws IOException {
89+
return directory.createOutput(FST_FILE, IOContext.DEFAULT);
2990
}
3091

31-
public WordsOutput wordsOutput() throws IOException {
32-
return synonymMapFormat.getWordsOutput(directory);
33-
}
34-
35-
public void writeMetadata(int wordCount, int maxHorizontalContext, FST<BytesRef> fst) throws IOException {
36-
synonymMapFormat.writeMetadata(directory, wordCount, maxHorizontalContext, fst);
37-
}
92+
public WordsOutput getWordsOutput(Directory directory) throws IOException {
93+
IndexOutput wordsOutput = directory.createOutput(WORDS_FILE, IOContext.DEFAULT);
94+
return new WordsOutput() {
95+
@Override
96+
public void close() throws IOException {
97+
wordsOutput.close();
98+
}
3899

39-
public SynonymMap readMap() throws IOException {
40-
return synonymMapFormat.readSynonymMap(directory);
100+
@Override
101+
public void addWord(BytesRef word) throws IOException {
102+
wordsOutput.writeVInt(word.length);
103+
wordsOutput.writeBytes(word.bytes, word.offset, word.length);
104+
}
105+
};
41106
}
42-
43-
public boolean hasSynonyms() throws IOException {
44-
// TODO should take the path to the synonyms file to compare file hash against file used to build the directory
45-
return directory.listAll().length > 0;
107+
;
108+
109+
public void writeMetadata(
110+
Directory directory, int wordCount, int maxHorizontalContext, FST<BytesRef> fst)
111+
throws IOException {
112+
try (IndexOutput metadataOutput = directory.createOutput(METADATA_FILE, IOContext.DEFAULT)) {
113+
metadataOutput.writeVInt(wordCount);
114+
metadataOutput.writeVInt(maxHorizontalContext);
115+
fst.saveMetadata(metadataOutput);
116+
}
117+
directory.sync(List.of(FST_FILE, WORDS_FILE, METADATA_FILE));
46118
}
47119

48-
@Override
49-
public void close() throws IOException {
50-
directory.close();
120+
private SynonymMetadata readMetadata(Directory directory) throws IOException {
121+
try (IndexInput metadataInput = directory.openInput(METADATA_FILE, IOContext.READONCE)) {
122+
int wordCount = metadataInput.readVInt();
123+
int maxHorizontalContext = metadataInput.readVInt();
124+
FST.FSTMetadata<BytesRef> fstMetadata =
125+
FST.readMetadata(metadataInput, ByteSequenceOutputs.getSingleton());
126+
return new SynonymMetadata(wordCount, maxHorizontalContext, fstMetadata);
127+
}
51128
}
52129

53-
public static abstract class WordsOutput implements Closeable {
54-
public abstract void addWord(BytesRef word) throws IOException;
130+
public SynonymMap readSynonymMap(Directory directory) throws IOException {
131+
SynonymMetadata synonymMetadata = readMetadata(directory);
132+
FST<BytesRef> fst =
133+
new FST<>(
134+
synonymMetadata.fstMetadata,
135+
directory.openInput(FST_FILE, IOContext.DEFAULT),
136+
new OffHeapFSTStore());
137+
IndexInput wordsInput = directory.openInput(WORDS_FILE, IOContext.READ);
138+
int[] bytesStartArray = new int[synonymMetadata.wordCount];
139+
for (int i = 0; i < synonymMetadata.wordCount; i++) {
140+
bytesStartArray[i] = Math.toIntExact(wordsInput.getFilePointer());
141+
int length = wordsInput.readVInt();
142+
wordsInput.seek(wordsInput.getFilePointer() + length);
143+
}
144+
return new SynonymMap(
145+
fst,
146+
new OffHeapBytesRefHashLike(bytesStartArray, wordsInput),
147+
synonymMetadata.maxHorizontalContext);
55148
}
56149

57-
private static class SynonymMapFormat {
58-
private static final String FST_FILE = "synonyms.fst";
59-
private static final String WORDS_FILE = "synonyms.wrd";
60-
private static final String METADATA_FILE = "synonyms.mdt";
61-
62-
public IndexOutput getFSTOutput(Directory directory) throws IOException {
63-
return directory.createOutput(FST_FILE, IOContext.DEFAULT);
64-
}
65-
66-
public WordsOutput getWordsOutput(Directory directory) throws IOException {
67-
IndexOutput wordsOutput = directory.createOutput(WORDS_FILE, IOContext.DEFAULT);
68-
return new WordsOutput() {
69-
@Override
70-
public void close() throws IOException {
71-
wordsOutput.close();
72-
}
73-
74-
@Override
75-
public void addWord(BytesRef word) throws IOException {
76-
wordsOutput.writeVInt(word.length);
77-
wordsOutput.writeBytes(word.bytes, word.offset, word.length);
78-
}
79-
};
80-
};
81-
82-
public void writeMetadata(Directory directory, int wordCount, int maxHorizontalContext, FST<BytesRef> fst) throws IOException {
83-
try (IndexOutput metadataOutput = directory.createOutput(METADATA_FILE, IOContext.DEFAULT)) {
84-
metadataOutput.writeVInt(wordCount);
85-
metadataOutput.writeVInt(maxHorizontalContext);
86-
fst.saveMetadata(metadataOutput);
87-
}
88-
directory.sync(List.of(FST_FILE, WORDS_FILE, METADATA_FILE));
89-
}
90-
91-
private SynonymMetadata readMetadata(Directory directory) throws IOException {
92-
try (IndexInput metadataInput = directory.openInput(METADATA_FILE, IOContext.READONCE)) {
93-
int wordCount = metadataInput.readVInt();
94-
int maxHorizontalContext = metadataInput.readVInt();
95-
FST.FSTMetadata<BytesRef> fstMetadata = FST.readMetadata(metadataInput, ByteSequenceOutputs.getSingleton());
96-
return new SynonymMetadata(wordCount, maxHorizontalContext, fstMetadata);
97-
}
150+
private static class OffHeapBytesRefHashLike extends SynonymMap.BytesRefHashLike {
151+
private final int[] bytesStartArray;
152+
private final IndexInput wordsFile;
153+
154+
public OffHeapBytesRefHashLike(int[] bytesStartArray, IndexInput wordsFile) {
155+
this.bytesStartArray = bytesStartArray;
156+
this.wordsFile = wordsFile;
157+
}
158+
159+
@Override
160+
public void get(int id, BytesRef scratch) throws IOException {
161+
wordsFile.seek(bytesStartArray[id]);
162+
int length = wordsFile.readVInt();
163+
if (scratch.bytes.length < length) {
164+
scratch.bytes = new byte[length];
98165
}
166+
wordsFile.readBytes(scratch.bytes, 0, length);
167+
scratch.offset = 0;
168+
scratch.length = length;
169+
}
170+
}
99171

100-
public SynonymMap readSynonymMap(Directory directory) throws IOException {
101-
SynonymMetadata synonymMetadata = readMetadata(directory);
102-
FST<BytesRef> fst = new FST<>(synonymMetadata.fstMetadata, directory.openInput(FST_FILE, IOContext.DEFAULT), new OffHeapFSTStore());
103-
IndexInput wordsInput = directory.openInput(WORDS_FILE, IOContext.READ);
104-
int[] bytesStartArray = new int[synonymMetadata.wordCount];
105-
for (int i = 0; i < synonymMetadata.wordCount; i++) {
106-
bytesStartArray[i] = Math.toIntExact(wordsInput.getFilePointer());
107-
int length = wordsInput.readVInt();
108-
wordsInput.seek(wordsInput.getFilePointer() + length);
109-
}
110-
return new SynonymMap(fst, new OffHeapBytesRefHashLike(bytesStartArray, wordsInput), synonymMetadata.maxHorizontalContext);
111-
}
112-
113-
private static class OffHeapBytesRefHashLike extends SynonymMap.BytesRefHashLike {
114-
private final int[] bytesStartArray;
115-
private final IndexInput wordsFile;
116-
117-
public OffHeapBytesRefHashLike(int[] bytesStartArray, IndexInput wordsFile) {
118-
this.bytesStartArray = bytesStartArray;
119-
this.wordsFile = wordsFile;
120-
}
121-
122-
@Override
123-
public void get(int id, BytesRef scratch) throws IOException {
124-
wordsFile.seek(bytesStartArray[id]);
125-
int length = wordsFile.readVInt();
126-
if (scratch.bytes.length < length) {
127-
scratch.bytes = new byte[length];
128-
}
129-
wordsFile.readBytes(scratch.bytes, 0, length);
130-
scratch.offset = 0;
131-
scratch.length = length;
132-
}
133-
}
134-
135-
private static class SynonymMetadata {
136-
final int wordCount;
137-
final int maxHorizontalContext;
138-
final FST.FSTMetadata<BytesRef> fstMetadata;
139-
140-
SynonymMetadata(int wordCount, int maxHorizontalContext, FST.FSTMetadata<BytesRef> fstMetadata) {
141-
this.wordCount = wordCount;
142-
this.maxHorizontalContext = maxHorizontalContext;
143-
this.fstMetadata = fstMetadata;
144-
}
145-
}
172+
private static class SynonymMetadata {
173+
final int wordCount;
174+
final int maxHorizontalContext;
175+
final FST.FSTMetadata<BytesRef> fstMetadata;
176+
177+
SynonymMetadata(
178+
int wordCount, int maxHorizontalContext, FST.FSTMetadata<BytesRef> fstMetadata) {
179+
this.wordCount = wordCount;
180+
this.maxHorizontalContext = maxHorizontalContext;
181+
this.fstMetadata = fstMetadata;
182+
}
146183
}
184+
}
147185
}

0 commit comments

Comments
 (0)