Skip to content

Bypass HNSW graph building for tiny segments #14963

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,25 @@ public class Lucene99HnswScalarQuantizedVectorsFormat extends KnnVectorsFormat {
private final int numMergeWorkers;
private final TaskExecutor mergeExec;

/**
* Whether to bypass HNSW graph building for tiny segments (below {@link
* Lucene99HnswVectorsFormat#HNSW_GRAPH_THRESHOLD}). When enabled, segments with fewer than the
* threshold number of vectors will store only flat vectors with quantization, significantly
* improving indexing performance for workloads with frequent flushes.
*/
private final boolean bypassTinySegments;

/** Constructs a format using default graph construction parameters with 7 bit quantization */
public Lucene99HnswScalarQuantizedVectorsFormat() {
this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, 7, false, null, null);
this(
DEFAULT_MAX_CONN,
DEFAULT_BEAM_WIDTH,
DEFAULT_NUM_MERGE_WORKER,
7,
false,
null,
null,
false);
}

/**
Expand All @@ -77,7 +93,19 @@ public Lucene99HnswScalarQuantizedVectorsFormat() {
* @param beamWidth the size of the queue maintained during graph construction.
*/
public Lucene99HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth) {
this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, 7, false, null, null);
this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, 7, false, null, null, false);
}

/**
* Constructs a format using the given graph construction parameters with 7 bit quantization
*
* @param maxConn the maximum number of connections to a node in the HNSW graph
* @param beamWidth the size of the queue maintained during graph construction.
* @param bypassTinySegments whether to bypass HNSW graph building for tiny segments
*/
public Lucene99HnswScalarQuantizedVectorsFormat(
int maxConn, int beamWidth, boolean bypassTinySegments) {
this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, 7, false, null, null, bypassTinySegments);
}

/**
Expand Down Expand Up @@ -107,6 +135,40 @@ public Lucene99HnswScalarQuantizedVectorsFormat(
boolean compress,
Float confidenceInterval,
ExecutorService mergeExec) {
this(maxConn, beamWidth, numMergeWorkers, bits, compress, confidenceInterval, mergeExec, false);
}

/**
* Constructs a format using the given graph construction parameters and scalar quantization.
*
* @param maxConn the maximum number of connections to a node in the HNSW graph
* @param beamWidth the size of the queue maintained during graph construction.
* @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
* larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
* @param bits the number of bits to use for scalar quantization (must be 4 or 7)
* @param compress whether to compress the quantized vectors by another 50% when bits=4. If
* `true`, pairs of (4 bit quantized) dimensions are packed into a single byte. This must be
* `false` when bits=7. This provides a trade-off of 50% reduction in hot vector memory usage
* during searching, at some decode speed penalty.
* @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
* it is calculated based on the vector field dimensions. When `0`, the quantiles are
* dynamically determined by sampling many confidence intervals and determining the most
* accurate pair.
* @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
* generated by this format to do the merge
* @param bypassTinySegments whether to bypass HNSW graph building for tiny segments (below {@link
* Lucene99HnswVectorsFormat#HNSW_GRAPH_THRESHOLD} vectors). When enabled, improves indexing
* performance for workloads with frequent flushes.
*/
public Lucene99HnswScalarQuantizedVectorsFormat(
int maxConn,
int beamWidth,
int numMergeWorkers,
int bits,
boolean compress,
Float confidenceInterval,
ExecutorService mergeExec,
boolean bypassTinySegments) {
super(NAME);
if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) {
throw new IllegalArgumentException(
Expand All @@ -124,6 +186,7 @@ public Lucene99HnswScalarQuantizedVectorsFormat(
}
this.maxConn = maxConn;
this.beamWidth = beamWidth;
this.bypassTinySegments = bypassTinySegments;
if (numMergeWorkers == 1 && mergeExec != null) {
throw new IllegalArgumentException(
"No executor service is needed as we'll use single thread to merge");
Expand All @@ -146,7 +209,8 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException
beamWidth,
flatVectorsFormat.fieldsWriter(state),
numMergeWorkers,
mergeExec);
mergeExec,
bypassTinySegments);
}

@Override
Expand All @@ -165,6 +229,8 @@ public String toString() {
+ maxConn
+ ", beamWidth="
+ beamWidth
+ ", bypassTinySegments="
+ bypassTinySegments
+ ", flatVectorFormat="
+ flatVectorsFormat
+ ")";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,13 @@ public final class Lucene99HnswVectorsFormat extends KnnVectorsFormat {
/** Default to use single thread merge */
public static final int DEFAULT_NUM_MERGE_WORKER = 1;

/**
* Threshold below which HNSW graph building is bypassed for tiny segments. Segments with fewer
* vectors will use flat storage only, improving indexing performance when having frequent
* flushes.
*/
public static final int HNSW_GRAPH_THRESHOLD = 10_000;

static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16;

/**
Expand All @@ -137,9 +144,16 @@ public final class Lucene99HnswVectorsFormat extends KnnVectorsFormat {
private final int numMergeWorkers;
private final TaskExecutor mergeExec;

/**
* Whether to bypass HNSW graph building for tiny segments (below {@link #HNSW_GRAPH_THRESHOLD}).
* When enabled, segments with fewer than the threshold number of vectors will store only flat
* vectors, significantly improving indexing performance for workloads with frequent flushes.
*/
private final boolean bypassTinySegments;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we allow this to be a parameter, it should be a threshold that refers to the typical k used when querying.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense


/** Constructs a format using default graph construction parameters */
public Lucene99HnswVectorsFormat() {
this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, null);
this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, null, false);
}

/**
Expand All @@ -149,11 +163,22 @@ public Lucene99HnswVectorsFormat() {
* @param beamWidth the size of the queue maintained during graph construction.
*/
public Lucene99HnswVectorsFormat(int maxConn, int beamWidth) {
this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null);
this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null, false);
}

/**
* Constructs a format using the given graph construction parameters and scalar quantization.
* Constructs a format using the given graph construction parameters.
*
* @param maxConn the maximum number of connections to a node in the HNSW graph
* @param beamWidth the size of the queue maintained during graph construction.
* @param bypassTinySegments whether to bypass HNSW graph building for tiny segments
*/
public Lucene99HnswVectorsFormat(int maxConn, int beamWidth, boolean bypassTinySegments) {
this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null, bypassTinySegments);
}

/**
* Constructs a format using the given graph construction parameters.
*
* @param maxConn the maximum number of connections to a node in the HNSW graph
* @param beamWidth the size of the queue maintained during graph construction.
Expand All @@ -165,6 +190,29 @@ public Lucene99HnswVectorsFormat(int maxConn, int beamWidth) {
*/
public Lucene99HnswVectorsFormat(
int maxConn, int beamWidth, int numMergeWorkers, ExecutorService mergeExec) {
this(maxConn, beamWidth, numMergeWorkers, mergeExec, false);
}

/**
* Constructs a format using the given graph construction parameters.
*
* @param maxConn the maximum number of connections to a node in the HNSW graph
* @param beamWidth the size of the queue maintained during graph construction.
* @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
* larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
* @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
* generated by this format to do the merge. If null, the configured {@link
* MergeScheduler#getIntraMergeExecutor(MergePolicy.OneMerge)} is used.
* @param bypassTinySegments whether to bypass HNSW graph building for tiny segments (below {@link
* #HNSW_GRAPH_THRESHOLD} vectors). When enabled, improves indexing performance for workloads
* with frequent flushes.
*/
public Lucene99HnswVectorsFormat(
int maxConn,
int beamWidth,
int numMergeWorkers,
ExecutorService mergeExec,
boolean bypassTinySegments) {
super("Lucene99HnswVectorsFormat");
if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) {
throw new IllegalArgumentException(
Expand All @@ -182,6 +230,7 @@ public Lucene99HnswVectorsFormat(
}
this.maxConn = maxConn;
this.beamWidth = beamWidth;
this.bypassTinySegments = bypassTinySegments;
if (numMergeWorkers == 1 && mergeExec != null) {
throw new IllegalArgumentException(
"No executor service is needed as we'll use single thread to merge");
Expand All @@ -202,12 +251,14 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException
beamWidth,
flatVectorsFormat.fieldsWriter(state),
numMergeWorkers,
mergeExec);
mergeExec,
bypassTinySegments);
}

@Override
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state));
return new Lucene99HnswVectorsReader(
state, flatVectorsFormat.fieldsReader(state), bypassTinySegments);
}

@Override
Expand All @@ -221,6 +272,8 @@ public String toString() {
+ maxConn
+ ", beamWidth="
+ beamWidth
+ ", bypassTinySegments="
+ bypassTinySegments
+ ", flatVectorFormat="
+ flatVectorsFormat
+ ")";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,14 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
private final FieldInfos fieldInfos;
private final IntObjectHashMap<FieldEntry> fields;
private final IndexInput vectorIndex;
private final boolean bypassTinySegments;

public Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatVectorsReader)
public Lucene99HnswVectorsReader(
SegmentReadState state, FlatVectorsReader flatVectorsReader, boolean bypassTinySegments)
throws IOException {
this.fields = new IntObjectHashMap<>();
this.flatVectorsReader = flatVectorsReader;
this.bypassTinySegments = bypassTinySegments;
this.fieldInfos = state.fieldInfos;
String metaFileName =
IndexFileNames.segmentFileName(
Expand Down Expand Up @@ -122,12 +125,18 @@ public Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatV
}
}

public Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatVectorsReader)
throws IOException {
this(state, flatVectorsReader, false);
}

private Lucene99HnswVectorsReader(
Lucene99HnswVectorsReader reader, FlatVectorsReader flatVectorsReader) {
this.flatVectorsReader = flatVectorsReader;
this.fieldInfos = reader.fieldInfos;
this.fields = reader.fields;
this.vectorIndex = reader.vectorIndex;
this.bypassTinySegments = reader.bypassTinySegments;
}

@Override
Expand Down Expand Up @@ -326,16 +335,19 @@ private void search(
final KnnCollector collector =
new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc);
final Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs);
HnswGraph graph = getGraph(fieldEntry);
boolean doHnsw = knnCollector.k() < scorer.maxOrd();
boolean doHnsw =
knnCollector.k() < scorer.maxOrd()
&& (bypassTinySegments == false
|| fieldEntry.size() > Lucene99HnswVectorsFormat.HNSW_GRAPH_THRESHOLD);
Comment on lines +338 to +341
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reader should just look to see if there is a graph.

// Take into account if quantized? E.g. some scorer cost?
int filteredDocCount = 0;
// The approximate number of vectors that would be visited if we did not filter
int unfilteredVisit = HnswGraphSearcher.expectedVisitedNodes(knnCollector.k(), graph.size());
int unfilteredVisit =
HnswGraphSearcher.expectedVisitedNodes(knnCollector.k(), fieldEntry.size());
if (acceptDocs instanceof BitSet bitSet) {
// Use approximate cardinality as this is good enough, but ensure we don't exceed the graph
// size as that is illogical
filteredDocCount = Math.min(bitSet.approximateCardinality(), graph.size());
filteredDocCount = Math.min(bitSet.approximateCardinality(), fieldEntry.size());
if (unfilteredVisit >= filteredDocCount) {
doHnsw = false;
}
Expand Down
Loading
Loading