Skip to content

Commit 883afea

Browse files
Optimize ForUtil.expand8 using the JDK Vector API
1 parent b9288ae commit 883afea

File tree

8 files changed

+50
-16
lines changed

8 files changed

+50
-16
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ Optimizations
150150
* GITHUB#15160: Increased the size used for blocks of postings from 128 to 256.
151151
This gives a noticeable speedup to many queries. (Adrien Grand)
152152

153+
* GITHUB#15198: Optimize ForUtil.expand8 using the JDK Vector API. (Ramakrishna Chilaka)
154+
153155
* GITHUB#14863: Perform scoring for 4, 7, 8 bit quantized vectors off-heap. (Kaival Parikh)
154156

155157
Bug Fixes
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{
2-
"lucene/core/src/java/org/apache/lucene/codecs/lucene104/ForUtil.java": "5dda079c68e6060217f29010618c7fd807583056",
3-
"lucene/core/src/java/org/apache/lucene/codecs/lucene104/gen_ForUtil.py": "4692fed62d9f79554647c5423b96b9e60c9f30eb"
2+
"lucene/core/src/java/org/apache/lucene/codecs/lucene104/ForUtil.java": "bf1168dbc05311c2e49b652391e01cb01d3f9133",
3+
"lucene/core/src/java/org/apache/lucene/codecs/lucene104/gen_ForUtil.py": "e87e420e633601f6f751b6777d7c094ebc66c3e7"
44
}

lucene/core/src/java/org/apache/lucene/codecs/lucene104/ForUtil.java

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.io.IOException;
2222
import org.apache.lucene.internal.vectorization.PostingDecodingUtil;
2323
import org.apache.lucene.store.DataOutput;
24+
import org.apache.lucene.util.VectorUtil;
2425

2526
/**
2627
* Inspired from https://fulmicoton.com/posts/bitpacking/ Encodes multiple integers in one to get
@@ -55,13 +56,7 @@ static int mask8(int bitsPerValue) {
5556
}
5657

5758
static void expand8(int[] arr) {
58-
for (int i = 0; i < 64; ++i) {
59-
int l = arr[i];
60-
arr[i] = (l >>> 24) & 0xFF;
61-
arr[64 + i] = (l >>> 16) & 0xFF;
62-
arr[128 + i] = (l >>> 8) & 0xFF;
63-
arr[192 + i] = l & 0xFF;
64-
}
59+
VectorUtil.expand8(arr);
6560
}
6661

6762
static void collapse8(int[] arr) {

lucene/core/src/java/org/apache/lucene/codecs/lucene104/gen_ForUtil.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import java.io.IOException;
4646
import org.apache.lucene.internal.vectorization.PostingDecodingUtil;
4747
import org.apache.lucene.store.DataOutput;
48+
import org.apache.lucene.util.VectorUtil;
4849
4950
/**
5051
* Inspired from https://fulmicoton.com/posts/bitpacking/
@@ -80,13 +81,7 @@
8081
}
8182
8283
static void expand8(int[] arr) {
83-
for (int i = 0; i < 64; ++i) {
84-
int l = arr[i];
85-
arr[i] = (l >>> 24) & 0xFF;
86-
arr[64 + i] = (l >>> 16) & 0xFF;
87-
arr[128 + i] = (l >>> 8) & 0xFF;
88-
arr[192 + i] = l & 0xFF;
89-
}
84+
VectorUtil.expand8(arr);
9085
}
9186
9287
static void collapse8(int[] arr) {

lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorUtilSupport.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,4 +415,16 @@ public float[] l2normalize(float[] v, boolean throwOnZero) {
415415
}
416416
return v;
417417
}
418+
419+
@Override
420+
public void expand8(int[] arr) {
421+
// BLOCK_SIZE is 256
422+
for (int i = 0; i < 64; ++i) {
423+
int l = arr[i];
424+
arr[i] = (l >>> 24) & 0xFF;
425+
arr[64 + i] = (l >>> 16) & 0xFF;
426+
arr[128 + i] = (l >>> 8) & 0xFF;
427+
arr[192 + i] = l & 0xFF;
428+
}
429+
}
418430
}

lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorUtilSupport.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,4 +144,6 @@ float recalculateScalarQuantizationOffset(
144144
int filterByScore(int[] docBuffer, double[] scoreBuffer, double minScoreInclusive, int upTo);
145145

146146
float[] l2normalize(float[] v, boolean throwOnZero);
147+
148+
void expand8(int[] arr);
147149
}

lucene/core/src/java/org/apache/lucene/util/VectorUtil.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,4 +484,8 @@ public static int filterByScore(
484484
}
485485
return IMPL.filterByScore(docBuffer, scoreBuffer, minScoreInclusive, upTo);
486486
}
487+
488+
public static void expand8(int[] arr) {
489+
IMPL.expand8(arr);
490+
}
487491
}

lucene/core/src/java24/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,4 +1409,28 @@ private void l2normalizeBody(float[] v, float invNorm, int limit) {
14091409
FloatVector.fromArray(FLOAT_SPECIES, v, i).mul(invNormVector).intoArray(v, i);
14101410
}
14111411
}
1412+
1413+
private static final boolean EXPAND_8_VECTOR_OPTIMIZATION = INT_SPECIES.length() >= 8;
1414+
1415+
@Override
1416+
public void expand8(int[] arr) {
1417+
if (EXPAND_8_VECTOR_OPTIMIZATION) {
1418+
for (int i = 0; i < 64; i += INT_SPECIES.length()) {
1419+
IntVector v = IntVector.fromArray(INT_SPECIES, arr, i);
1420+
1421+
v.lanewise(LSHR, 24).intoArray(arr, i);
1422+
v.lanewise(LSHR, 16).and(0xFF).intoArray(arr, 64 + i);
1423+
v.lanewise(LSHR, 8).and(0xFF).intoArray(arr, 128 + i);
1424+
v.and(0xFF).intoArray(arr, 192 + i);
1425+
}
1426+
} else {
1427+
for (int i = 0; i < 64; ++i) {
1428+
int l = arr[i];
1429+
arr[i] = (l >>> 24) & 0xFF;
1430+
arr[64 + i] = (l >>> 16) & 0xFF;
1431+
arr[128 + i] = (l >>> 8) & 0xFF;
1432+
arr[192 + i] = l & 0xFF;
1433+
}
1434+
}
1435+
}
14121436
}

0 commit comments

Comments
 (0)