apache · shubhamvishu · Jul 17, 2025 · jpountz · Aug 1, 2025 · benwtrent
diff --git a/.../src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java b/.../src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java
@@ -65,9 +65,25 @@ public class Lucene99HnswScalarQuantizedVectorsFormat extends KnnVectorsFormat {
   private final int numMergeWorkers;
   private final TaskExecutor mergeExec;
 
+  /**
+   * Whether to bypass HNSW graph building for tiny segments (below {@link
+   * Lucene99HnswVectorsFormat#HNSW_GRAPH_THRESHOLD}). When enabled, segments with fewer than the
+   * threshold number of vectors will store only flat vectors with quantization, significantly
+   * improving indexing performance for workloads with frequent flushes.
+   */
+  private final boolean bypassTinySegments;
+
   /** Constructs a format using default graph construction parameters with 7 bit quantization */
   public Lucene99HnswScalarQuantizedVectorsFormat() {
-    this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, 7, false, null, null);
+    this(
+        DEFAULT_MAX_CONN,
+        DEFAULT_BEAM_WIDTH,
+        DEFAULT_NUM_MERGE_WORKER,
+        7,
+        false,
+        null,
+        null,
+        false);
   }
 
   /**
@@ -77,7 +93,19 @@ public Lucene99HnswScalarQuantizedVectorsFormat() {
    * @param beamWidth the size of the queue maintained during graph construction.
    */
   public Lucene99HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth) {
-    this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, 7, false, null, null);
+    this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, 7, false, null, null, false);
+  }
+
+  /**
+   * Constructs a format using the given graph construction parameters with 7 bit quantization
+   *
+   * @param maxConn the maximum number of connections to a node in the HNSW graph
+   * @param beamWidth the size of the queue maintained during graph construction.
+   * @param bypassTinySegments whether to bypass HNSW graph building for tiny segments
+   */
+  public Lucene99HnswScalarQuantizedVectorsFormat(
+      int maxConn, int beamWidth, boolean bypassTinySegments) {
+    this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, 7, false, null, null, bypassTinySegments);
   }
 
   /**
@@ -107,6 +135,40 @@ public Lucene99HnswScalarQuantizedVectorsFormat(
       boolean compress,
       Float confidenceInterval,
       ExecutorService mergeExec) {
+    this(maxConn, beamWidth, numMergeWorkers, bits, compress, confidenceInterval, mergeExec, false);
+  }
+
+  /**
+   * Constructs a format using the given graph construction parameters and scalar quantization.
+   *
+   * @param maxConn the maximum number of connections to a node in the HNSW graph
+   * @param beamWidth the size of the queue maintained during graph construction.
+   * @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
+   *     larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
+   * @param bits the number of bits to use for scalar quantization (must be 4 or 7)
+   * @param compress whether to compress the quantized vectors by another 50% when bits=4. If
+   *     `true`, pairs of (4 bit quantized) dimensions are packed into a single byte. This must be
+   *     `false` when bits=7. This provides a trade-off of 50% reduction in hot vector memory usage
+   *     during searching, at some decode speed penalty.
+   * @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
+   *     it is calculated based on the vector field dimensions. When `0`, the quantiles are
+   *     dynamically determined by sampling many confidence intervals and determining the most
+   *     accurate pair.
+   * @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
+   *     generated by this format to do the merge
+   * @param bypassTinySegments whether to bypass HNSW graph building for tiny segments (below {@link
+   *     Lucene99HnswVectorsFormat#HNSW_GRAPH_THRESHOLD} vectors). When enabled, improves indexing
+   *     performance for workloads with frequent flushes.
+   */
+  public Lucene99HnswScalarQuantizedVectorsFormat(
+      int maxConn,
+      int beamWidth,
+      int numMergeWorkers,
+      int bits,
+      boolean compress,
+      Float confidenceInterval,
+      ExecutorService mergeExec,
+      boolean bypassTinySegments) {
     super(NAME);
     if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) {
       throw new IllegalArgumentException(
@@ -124,6 +186,7 @@ public Lucene99HnswScalarQuantizedVectorsFormat(
     }
     this.maxConn = maxConn;
     this.beamWidth = beamWidth;
+    this.bypassTinySegments = bypassTinySegments;
     if (numMergeWorkers == 1 && mergeExec != null) {
       throw new IllegalArgumentException(
           "No executor service is needed as we'll use single thread to merge");
@@ -146,7 +209,8 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException
         beamWidth,
         flatVectorsFormat.fieldsWriter(state),
         numMergeWorkers,
-        mergeExec);
+        mergeExec,
+        bypassTinySegments);
   }
 
   @Override
@@ -165,6 +229,8 @@ public String toString() {
         + maxConn
         + ", beamWidth="
         + beamWidth
+        + ", bypassTinySegments="
+        + bypassTinySegments
         + ", flatVectorFormat="
         + flatVectorsFormat
         + ")";

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsFormat.java
@@ -115,6 +115,13 @@ public final class Lucene99HnswVectorsFormat extends KnnVectorsFormat {
   /** Default to use single thread merge */
   public static final int DEFAULT_NUM_MERGE_WORKER = 1;
 
+  /**
+   * Threshold below which HNSW graph building is bypassed for tiny segments. Segments with fewer
+   * vectors will use flat storage only, improving indexing performance when having frequent
+   * flushes.
+   */
+  public static final int HNSW_GRAPH_THRESHOLD = 10_000;
+
   static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16;
 
   /**
@@ -137,9 +144,16 @@ public final class Lucene99HnswVectorsFormat extends KnnVectorsFormat {
   private final int numMergeWorkers;
   private final TaskExecutor mergeExec;
 
+  /**
+   * Whether to bypass HNSW graph building for tiny segments (below {@link #HNSW_GRAPH_THRESHOLD}).
+   * When enabled, segments with fewer than the threshold number of vectors will store only flat
+   * vectors, significantly improving indexing performance for workloads with frequent flushes.
+   */
+  private final boolean bypassTinySegments;
+
   /** Constructs a format using default graph construction parameters */
   public Lucene99HnswVectorsFormat() {
-    this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, null);
+    this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, null, false);
   }
 
   /**
@@ -149,11 +163,22 @@ public Lucene99HnswVectorsFormat() {
    * @param beamWidth the size of the queue maintained during graph construction.
    */
   public Lucene99HnswVectorsFormat(int maxConn, int beamWidth) {
-    this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null);
+    this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null, false);
   }
 
   /**
-   * Constructs a format using the given graph construction parameters and scalar quantization.
+   * Constructs a format using the given graph construction parameters.
+   *
+   * @param maxConn the maximum number of connections to a node in the HNSW graph
+   * @param beamWidth the size of the queue maintained during graph construction.
+   * @param bypassTinySegments whether to bypass HNSW graph building for tiny segments
+   */
+  public Lucene99HnswVectorsFormat(int maxConn, int beamWidth, boolean bypassTinySegments) {
+    this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null, bypassTinySegments);
+  }
+
+  /**
+   * Constructs a format using the given graph construction parameters.
    *
    * @param maxConn the maximum number of connections to a node in the HNSW graph
    * @param beamWidth the size of the queue maintained during graph construction.
@@ -165,6 +190,29 @@ public Lucene99HnswVectorsFormat(int maxConn, int beamWidth) {
    */
   public Lucene99HnswVectorsFormat(
       int maxConn, int beamWidth, int numMergeWorkers, ExecutorService mergeExec) {
+    this(maxConn, beamWidth, numMergeWorkers, mergeExec, false);
+  }
+
+  /**
+   * Constructs a format using the given graph construction parameters.
+   *
+   * @param maxConn the maximum number of connections to a node in the HNSW graph
+   * @param beamWidth the size of the queue maintained during graph construction.
+   * @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
+   *     larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
+   * @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
+   *     generated by this format to do the merge. If null, the configured {@link
+   *     MergeScheduler#getIntraMergeExecutor(MergePolicy.OneMerge)} is used.
+   * @param bypassTinySegments whether to bypass HNSW graph building for tiny segments (below {@link
+   *     #HNSW_GRAPH_THRESHOLD} vectors). When enabled, improves indexing performance for workloads
+   *     with frequent flushes.
+   */
+  public Lucene99HnswVectorsFormat(
+      int maxConn,
+      int beamWidth,
+      int numMergeWorkers,
+      ExecutorService mergeExec,
+      boolean bypassTinySegments) {
     super("Lucene99HnswVectorsFormat");
     if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) {
       throw new IllegalArgumentException(
@@ -182,6 +230,7 @@ public Lucene99HnswVectorsFormat(
     }
     this.maxConn = maxConn;
     this.beamWidth = beamWidth;
+    this.bypassTinySegments = bypassTinySegments;
     if (numMergeWorkers == 1 && mergeExec != null) {
       throw new IllegalArgumentException(
           "No executor service is needed as we'll use single thread to merge");
@@ -202,12 +251,14 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException
         beamWidth,
         flatVectorsFormat.fieldsWriter(state),
         numMergeWorkers,
-        mergeExec);
+        mergeExec,
+        bypassTinySegments);
   }
 
   @Override
   public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
-    return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state));
+    return new Lucene99HnswVectorsReader(
+        state, flatVectorsFormat.fieldsReader(state), bypassTinySegments);
   }
 
   @Override
@@ -221,6 +272,8 @@ public String toString() {
         + maxConn
         + ", beamWidth="
         + beamWidth
+        + ", bypassTinySegments="
+        + bypassTinySegments
         + ", flatVectorFormat="
         + flatVectorsFormat
         + ")";

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java
@@ -76,11 +76,14 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
   private final FieldInfos fieldInfos;
   private final IntObjectHashMap<FieldEntry> fields;
   private final IndexInput vectorIndex;
+  private final boolean bypassTinySegments;
 
-  public Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatVectorsReader)
+  public Lucene99HnswVectorsReader(
+      SegmentReadState state, FlatVectorsReader flatVectorsReader, boolean bypassTinySegments)
       throws IOException {
     this.fields = new IntObjectHashMap<>();
     this.flatVectorsReader = flatVectorsReader;
+    this.bypassTinySegments = bypassTinySegments;
     this.fieldInfos = state.fieldInfos;
     String metaFileName =
         IndexFileNames.segmentFileName(
@@ -122,12 +125,18 @@ public Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatV
     }
   }
 
+  public Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatVectorsReader)
+      throws IOException {
+    this(state, flatVectorsReader, false);
+  }
+
   private Lucene99HnswVectorsReader(
       Lucene99HnswVectorsReader reader, FlatVectorsReader flatVectorsReader) {
     this.flatVectorsReader = flatVectorsReader;
     this.fieldInfos = reader.fieldInfos;
     this.fields = reader.fields;
     this.vectorIndex = reader.vectorIndex;
+    this.bypassTinySegments = reader.bypassTinySegments;
   }
 
   @Override
@@ -326,16 +335,19 @@ private void search(
     final KnnCollector collector =
         new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc);
     final Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs);
-    HnswGraph graph = getGraph(fieldEntry);
-    boolean doHnsw = knnCollector.k() < scorer.maxOrd();
+    boolean doHnsw =
+        knnCollector.k() < scorer.maxOrd()
+            && (bypassTinySegments == false
+                || fieldEntry.size() > Lucene99HnswVectorsFormat.HNSW_GRAPH_THRESHOLD);
     // Take into account if quantized? E.g. some scorer cost?
     int filteredDocCount = 0;
     // The approximate number of vectors that would be visited if we did not filter
-    int unfilteredVisit = HnswGraphSearcher.expectedVisitedNodes(knnCollector.k(), graph.size());
+    int unfilteredVisit =
+        HnswGraphSearcher.expectedVisitedNodes(knnCollector.k(), fieldEntry.size());
     if (acceptDocs instanceof BitSet bitSet) {
       // Use approximate cardinality as this is good enough, but ensure we don't exceed the graph
       // size as that is illogical
-      filteredDocCount = Math.min(bitSet.approximateCardinality(), graph.size());
+      filteredDocCount = Math.min(bitSet.approximateCardinality(), fieldEntry.size());
       if (unfilteredVisit >= filteredDocCount) {
         doHnsw = false;
       }