Skip to content

Commit 722c929

Browse files
committed
Remove scoreAll() optimization from DefaultBulkScorer. (#14039)
I cannot see benefits from this optimization anymore when running luceneutil. However, I do see some benefits from specializing cases when the collector produces a competitive iterator or when the scorer produces a two-phase iterator.
1 parent 0c79dc7 commit 722c929

File tree

1 file changed

+86
-82
lines changed
  • lucene/core/src/java/org/apache/lucene/search

1 file changed

+86
-82
lines changed

Diff for: lucene/core/src/java/org/apache/lucene/search/Weight.java

+86-82
Original file line numberDiff line numberDiff line change
@@ -234,12 +234,13 @@ protected static class DefaultBulkScorer extends BulkScorer {
234234

235235
/** Sole constructor. */
236236
public DefaultBulkScorer(Scorer scorer) {
237-
if (scorer == null) {
238-
throw new NullPointerException();
239-
}
240-
this.scorer = scorer;
241-
this.iterator = scorer.iterator();
237+
this.scorer = Objects.requireNonNull(scorer);
242238
this.twoPhase = scorer.twoPhaseIterator();
239+
if (twoPhase == null) {
240+
this.iterator = scorer.iterator();
241+
} else {
242+
this.iterator = twoPhase.approximation();
243+
}
243244
}
244245

245246
@Override
@@ -251,36 +252,8 @@ public long cost() {
251252
public int score(LeafCollector collector, Bits acceptDocs, int min, int max)
252253
throws IOException {
253254
collector.setScorer(scorer);
254-
DocIdSetIterator scorerIterator = twoPhase == null ? iterator : twoPhase.approximation();
255255
DocIdSetIterator competitiveIterator = collector.competitiveIterator();
256256

257-
if (competitiveIterator == null
258-
&& scorerIterator.docID() == -1
259-
&& min == 0
260-
&& max == DocIdSetIterator.NO_MORE_DOCS) {
261-
scoreAll(collector, scorerIterator, twoPhase, acceptDocs);
262-
return DocIdSetIterator.NO_MORE_DOCS;
263-
} else {
264-
return scoreRange(
265-
collector, scorerIterator, twoPhase, competitiveIterator, acceptDocs, min, max);
266-
}
267-
}
268-
269-
/**
270-
* Specialized method to bulk-score a range of hits; we separate this from {@link #scoreAll} to
271-
* help out hotspot. See <a
272-
* href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a>
273-
*/
274-
static int scoreRange(
275-
LeafCollector collector,
276-
DocIdSetIterator iterator,
277-
TwoPhaseIterator twoPhase,
278-
DocIdSetIterator competitiveIterator,
279-
Bits acceptDocs,
280-
int min,
281-
int max)
282-
throws IOException {
283-
284257
if (competitiveIterator != null) {
285258
if (competitiveIterator.docID() > min) {
286259
min = competitiveIterator.docID();
@@ -289,75 +262,106 @@ static int scoreRange(
289262
}
290263
}
291264

292-
int doc = iterator.docID();
293-
if (doc < min) {
294-
if (doc == min - 1) {
295-
doc = iterator.nextDoc();
265+
if (iterator.docID() < min) {
266+
if (iterator.docID() == min - 1) {
267+
iterator.nextDoc();
296268
} else {
297-
doc = iterator.advance(min);
269+
iterator.advance(min);
298270
}
299271
}
300272

273+
// These various specializations help save some null checks in a hot loop, but as importantly
274+
// if not more importantly, they help reduce the polymorphism of calls sites to nextDoc() and
275+
// collect() because only a subset of collectors produce a competitive iterator, and the set
276+
// of implementing classes for two-phase approximations is smaller than the set of doc id set
277+
// iterator implementations.
301278
if (twoPhase == null && competitiveIterator == null) {
302279
// Optimize simple iterators with collectors that can't skip
303-
while (doc < max) {
304-
if (acceptDocs == null || acceptDocs.get(doc)) {
305-
collector.collect(doc);
306-
}
307-
doc = iterator.nextDoc();
308-
}
280+
scoreIterator(collector, acceptDocs, iterator, max);
281+
} else if (competitiveIterator == null) {
282+
scoreTwoPhaseIterator(collector, acceptDocs, iterator, twoPhase, max);
283+
} else if (twoPhase == null) {
284+
scoreCompetitiveIterator(collector, acceptDocs, iterator, competitiveIterator, max);
309285
} else {
310-
while (doc < max) {
311-
if (competitiveIterator != null) {
312-
assert competitiveIterator.docID() <= doc;
313-
if (competitiveIterator.docID() < doc) {
314-
competitiveIterator.advance(doc);
315-
}
316-
if (competitiveIterator.docID() != doc) {
317-
doc = iterator.advance(competitiveIterator.docID());
318-
continue;
319-
}
320-
}
286+
scoreTwoPhaseOrCompetitiveIterator(
287+
collector, acceptDocs, iterator, twoPhase, competitiveIterator, max);
288+
}
321289

322-
if ((acceptDocs == null || acceptDocs.get(doc))
323-
&& (twoPhase == null || twoPhase.matches())) {
324-
collector.collect(doc);
325-
}
326-
doc = iterator.nextDoc();
290+
return iterator.docID();
291+
}
292+
293+
private static void scoreIterator(
294+
LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, int max)
295+
throws IOException {
296+
for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) {
297+
if (acceptDocs == null || acceptDocs.get(doc)) {
298+
collector.collect(doc);
327299
}
328300
}
329-
330-
return doc;
331301
}
332302

333-
/**
334-
* Specialized method to bulk-score all hits; we separate this from {@link #scoreRange} to help
335-
* out hotspot. See <a href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a>
336-
*/
337-
static void scoreAll(
303+
private static void scoreTwoPhaseIterator(
338304
LeafCollector collector,
305+
Bits acceptDocs,
339306
DocIdSetIterator iterator,
340307
TwoPhaseIterator twoPhase,
341-
Bits acceptDocs)
308+
int max)
342309
throws IOException {
343-
if (twoPhase == null) {
344-
for (int doc = iterator.nextDoc();
345-
doc != DocIdSetIterator.NO_MORE_DOCS;
346-
doc = iterator.nextDoc()) {
347-
if (acceptDocs == null || acceptDocs.get(doc)) {
348-
collector.collect(doc);
310+
for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) {
311+
if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
312+
collector.collect(doc);
313+
}
314+
}
315+
}
316+
317+
private static void scoreCompetitiveIterator(
318+
LeafCollector collector,
319+
Bits acceptDocs,
320+
DocIdSetIterator iterator,
321+
DocIdSetIterator competitiveIterator,
322+
int max)
323+
throws IOException {
324+
for (int doc = iterator.docID(); doc < max; ) {
325+
assert competitiveIterator.docID() <= doc; // invariant
326+
if (competitiveIterator.docID() < doc) {
327+
int competitiveNext = competitiveIterator.advance(doc);
328+
if (competitiveNext != doc) {
329+
doc = iterator.advance(competitiveNext);
330+
continue;
349331
}
350332
}
351-
} else {
352-
// The scorer has an approximation, so run the approximation first, then check acceptDocs,
353-
// then confirm
354-
for (int doc = iterator.nextDoc();
355-
doc != DocIdSetIterator.NO_MORE_DOCS;
356-
doc = iterator.nextDoc()) {
357-
if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
358-
collector.collect(doc);
333+
334+
if ((acceptDocs == null || acceptDocs.get(doc))) {
335+
collector.collect(doc);
336+
}
337+
338+
doc = iterator.nextDoc();
339+
}
340+
}
341+
342+
private static void scoreTwoPhaseOrCompetitiveIterator(
343+
LeafCollector collector,
344+
Bits acceptDocs,
345+
DocIdSetIterator iterator,
346+
TwoPhaseIterator twoPhase,
347+
DocIdSetIterator competitiveIterator,
348+
int max)
349+
throws IOException {
350+
for (int doc = iterator.docID(); doc < max; ) {
351+
assert competitiveIterator.docID() <= doc; // invariant
352+
if (competitiveIterator.docID() < doc) {
353+
int competitiveNext = competitiveIterator.advance(doc);
354+
if (competitiveNext != doc) {
355+
doc = iterator.advance(competitiveNext);
356+
continue;
359357
}
360358
}
359+
360+
if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
361+
collector.collect(doc);
362+
}
363+
364+
doc = iterator.nextDoc();
361365
}
362366
}
363367
}

0 commit comments

Comments
 (0)