diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 5084c25f356..c2b37a94f89 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -61,6 +61,10 @@ Improvements * GITHUB#14113: Remove unnecessary ByteArrayDataInput allocations from `Lucene90DocValuesProducer$TermsDict.decompressBlock`. (Ankit Jain) +* GITHUB#14138: Implement IntersectVisitor#visit(IntsRef) in many of the current implementations and add + BulkAdder#add(IntsRef) method. They should provide better performance due to less virtual method calls and + more efficient bulk processing. (Ignacio Vera) + Optimizations --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java index 7f5f8cf6290..9cebb8e7301 100644 --- a/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java @@ -44,6 +44,7 @@ import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.NumericUtils; /** Distance query for {@link LatLonPoint}. */ @@ -233,6 +234,11 @@ public void visit(int docID) { adder.add(docID); } + @Override + public void visit(IntsRef ref) { + adder.add(ref); + } + @Override public void visit(DocIdSetIterator iterator) throws IOException { adder.add(iterator); @@ -269,6 +275,14 @@ public void visit(int docID) { cost[0]--; } + @Override + public void visit(IntsRef ref) { + for (int i = 0; i < ref.length; i++) { + result.clear(ref.ints[ref.offset + i]); + } + cost[0] = -ref.length; + } + @Override public void visit(DocIdSetIterator iterator) throws IOException { result.andNot(iterator); diff --git a/lucene/core/src/java/org/apache/lucene/document/LongDistanceFeatureQuery.java b/lucene/core/src/java/org/apache/lucene/document/LongDistanceFeatureQuery.java index c675136ca80..788ded4909b 100644 --- a/lucene/core/src/java/org/apache/lucene/document/LongDistanceFeatureQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/LongDistanceFeatureQuery.java @@ -35,6 +35,7 @@ import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.Weight; import org.apache.lucene.util.DocIdSetBuilder; +import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.NumericUtils; final class LongDistanceFeatureQuery extends Query { @@ -405,6 +406,21 @@ public void visit(int docID, byte[] packedValue) { adder.add(docID); } + @Override + public void visit(DocIdSetIterator iterator) throws IOException { + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + visit(docID); + } + } + + @Override + public void visit(IntsRef ref) { + for (int i = 0; i < ref.length; ++i) { + visit(ref.ints[ref.offset + i]); + } + } + @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { long minDocValue = NumericUtils.sortableBytesToLong(minPackedValue, 0); diff --git a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java index f5747c0f8bd..8248441f3cd 100644 --- a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java @@ -38,6 +38,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil.ByteArrayComparator; import org.apache.lucene.util.DocIdSetBuilder; +import org.apache.lucene.util.IntsRef; /** * Query class for searching {@code RangeField} types by a defined {@link Relation}. @@ -401,7 +402,12 @@ public void grow(int count) { } @Override - public void visit(int docID) throws IOException { + public void visit(IntsRef ref) { + adder.add(ref); + } + + @Override + public void visit(int docID) { adder.add(docID); } @@ -411,7 +417,7 @@ public void visit(DocIdSetIterator iterator) throws IOException { } @Override - public void visit(int docID, byte[] leaf) throws IOException { + public void visit(int docID, byte[] leaf) { if (queryType.matches(ranges, leaf, numDims, bytesPerDim, comparator)) { visit(docID); } diff --git a/lucene/core/src/java/org/apache/lucene/document/SpatialQuery.java b/lucene/core/src/java/org/apache/lucene/document/SpatialQuery.java index 811591d9a1c..cc233f89948 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SpatialQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SpatialQuery.java @@ -49,6 +49,7 @@ import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.IntsRef; /** * Base query class for all spatial geometries: {@link LatLonShape}, {@link LatLonPoint} and {@link @@ -445,6 +446,11 @@ public void visit(DocIdSetIterator iterator) throws IOException { adder.add(iterator); } + @Override + public void visit(IntsRef ref) { + adder.add(ref); + } + @Override public void visit(int docID, byte[] t) { if (leafPredicate.test(t)) { @@ -489,6 +495,14 @@ public void visit(DocIdSetIterator iterator) throws IOException { cost[0] += iterator.cost(); } + @Override + public void visit(IntsRef ref) { + for (int i = 0; i < ref.length; i++) { + result.set(ref.ints[ref.offset + i]); + } + cost[0] += ref.length; + } + @Override public void visit(int docID, byte[] t) { if (result.get(docID) == false) { @@ -532,6 +546,14 @@ public void visit(int docID) { cost[0]++; } + @Override + public void visit(IntsRef ref) { + for (int i = 0; i < ref.length; i++) { + result.set(ref.ints[ref.offset + i]); + } + cost[0] += ref.length; + } + @Override public void visit(DocIdSetIterator iterator) throws IOException { result.or(iterator); @@ -589,6 +611,13 @@ public void visit(DocIdSetIterator iterator) throws IOException { excluded.or(iterator); } + @Override + public void visit(IntsRef ref) { + for (int i = 0; i < ref.length; i++) { + visit(ref.ints[ref.offset + i]); + } + } + @Override public void visit(int docID, byte[] t) { if (excluded.get(docID) == false) { @@ -643,6 +672,14 @@ public void visit(int docID) { cost[0]--; } + @Override + public void visit(IntsRef ref) { + for (int i = 0; i < ref.length; i++) { + result.clear(ref.ints[ref.offset + i]); + } + cost[0] -= ref.length; + } + @Override public void visit(DocIdSetIterator iterator) throws IOException { result.andNot(iterator); @@ -693,6 +730,13 @@ public void visit(DocIdSetIterator iterator) throws IOException { result.andNot(iterator); } + @Override + public void visit(IntsRef ref) { + for (int i = 0; i < ref.length; i++) { + visit(ref.ints[ref.offset + i]); + } + } + @Override public void visit(int docID, byte[] packedTriangle) { // NO-OP diff --git a/lucene/core/src/java/org/apache/lucene/document/XYPointInGeometryQuery.java b/lucene/core/src/java/org/apache/lucene/document/XYPointInGeometryQuery.java index 47b6abb46c2..833d9c9209c 100644 --- a/lucene/core/src/java/org/apache/lucene/document/XYPointInGeometryQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/XYPointInGeometryQuery.java @@ -38,6 +38,7 @@ import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.Weight; import org.apache.lucene.util.DocIdSetBuilder; +import org.apache.lucene.util.IntsRef; /** * Finds all previously indexed points that fall within the specified XY geometries. @@ -90,6 +91,11 @@ public void visit(DocIdSetIterator iterator) throws IOException { adder.add(iterator); } + @Override + public void visit(IntsRef ref) { + adder.add(ref); + } + @Override public void visit(int docID, byte[] packedValue) { double x = XYEncodingUtils.decode(packedValue, 0); diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 1b6d6869c19..e5d956e8d1e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -188,9 +188,7 @@ public void visit(DocIdSetIterator iterator) throws IOException { @Override public void visit(IntsRef ref) { - for (int i = ref.offset; i < ref.offset + ref.length; i++) { - adder.add(ref.ints[i]); - } + adder.add(ref); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java index 28128af05f6..159cef02567 100644 --- a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java @@ -41,29 +41,28 @@ public final class DocIdSetBuilder { * * @see DocIdSetBuilder#grow */ - public abstract static class BulkAdder { - public abstract void add(int doc); + public sealed interface BulkAdder permits FixedBitSetAdder, BufferAdder { + void add(int doc); - public void add(DocIdSetIterator iterator) throws IOException { - int docID; - while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - add(docID); - } - } - } + void add(IntsRef docs); - private static class FixedBitSetAdder extends BulkAdder { - final FixedBitSet bitSet; + void add(DocIdSetIterator iterator) throws IOException; + } - FixedBitSetAdder(FixedBitSet bitSet) { - this.bitSet = bitSet; - } + private record FixedBitSetAdder(FixedBitSet bitSet) implements BulkAdder { @Override public void add(int doc) { bitSet.set(doc); } + @Override + public void add(IntsRef docs) { + for (int i = 0; i < docs.length; i++) { + bitSet.set(docs.ints[docs.offset + i]); + } + } + @Override public void add(DocIdSetIterator iterator) throws IOException { bitSet.or(iterator); @@ -85,17 +84,26 @@ private static class Buffer { } } - private static class BufferAdder extends BulkAdder { - final Buffer buffer; - - BufferAdder(Buffer buffer) { - this.buffer = buffer; - } + private record BufferAdder(Buffer buffer) implements BulkAdder { @Override public void add(int doc) { buffer.array[buffer.length++] = doc; } + + @Override + public void add(IntsRef docs) { + System.arraycopy(docs.ints, docs.offset, buffer.array, buffer.length, docs.length); + buffer.length += docs.length; + } + + @Override + public void add(DocIdSetIterator iterator) throws IOException { + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + add(docID); + } + } } private final int maxDoc; diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java index 0efcc2ef465..a90c79a8c80 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java @@ -24,6 +24,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.MathUtil; /** @@ -146,6 +147,19 @@ public void visit(int docID) { count[0]++; } + @Override + public void visit(DocIdSetIterator iterator) throws IOException { + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + visit(docID); + } + } + + @Override + public void visit(IntsRef ref) { + count[0] += ref.length; + } + @Override public void visit(int docID, byte[] packedValue) { throw new AssertionError(); diff --git a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java index 88dbf24e2d1..1d9079a203f 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java @@ -130,13 +130,20 @@ public void testRandom() throws IOException { for (j = 0; j < array.length; ) { final int l = TestUtil.nextInt(random(), 1, array.length - j); DocIdSetBuilder.BulkAdder adder = null; - for (int k = 0, budget = 0; k < l; ++k) { - if (budget == 0 || rarely()) { - budget = TestUtil.nextInt(random(), 1, l - k + 5); - adder = builder.grow(budget); + if (usually()) { + for (int k = 0, budget = 0; k < l; ++k) { + if (budget == 0 || rarely()) { + budget = TestUtil.nextInt(random(), 1, l - k + 5); + adder = builder.grow(budget); + } + adder.add(array[j++]); + budget--; } - adder.add(array[j++]); - budget--; + } else { + IntsRef intsRef = new IntsRef(array, j, l); + adder = builder.grow(l); + adder.add(intsRef); + j += l; } } diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInShapeIntersectVisitor.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInShapeIntersectVisitor.java index 8883fef2240..9b990b08e47 100644 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInShapeIntersectVisitor.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInShapeIntersectVisitor.java @@ -27,6 +27,7 @@ import org.apache.lucene.spatial3d.geom.PlanetModel.DocValueEncoder; import org.apache.lucene.spatial3d.geom.XYZBounds; import org.apache.lucene.util.DocIdSetBuilder; +import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.NumericUtils; class PointInShapeIntersectVisitor implements IntersectVisitor { @@ -67,6 +68,11 @@ public void visit(DocIdSetIterator iterator) throws IOException { adder.add(iterator); } + @Override + public void visit(IntsRef ref) throws IOException { + adder.add(ref); + } + @Override public void visit(int docID, byte[] packedValue) { assert packedValue.length == 12;