Introduce transposed centroid table to speedup ProductQuantizer::compute_codes() (facebookresearch#2562)

Alexandr Guzhva · facebook-github-bot · commit 771b1a8e3746 · 2022-11-06T08:32:54.000-08:00
Summary: Pull Request resolved: facebookresearch#2562 Introduce a table of transposed centroids in ProductQuantizer that significantly speeds up ProductQuantizer::compute_codes() call for certain PQ parameters, so speeds up search queries. * ::sync_tranposed_centroids() call is used to fill the table * ::clear_transposed_centroids() call clear the table, so that the original baseline code is used for ::compute_codes() Reviewed By: mdouze Differential Revision: D40763338 fbshipit-source-id: 87b40e5dd2f8c3cadeb94c1cd9e8a4a5b6ffa97d
diff --git a/benchs/bench_pq_transposed_centroid_table.py b/benchs/bench_pq_transposed_centroid_table.py
@@ -0,0 +1,129 @@
+import faiss
+import time
+import random
+
+import faiss.contrib.datasets
+
+
+# copied from benchs/bench_all_ivf/bench_all_ivf.py
+def unwind_index_ivf(index):
+    if isinstance(index, faiss.IndexPreTransform):
+        assert index.chain.size() == 1
+        vt = index.chain.at(0)
+        index_ivf, vt2 = unwind_index_ivf(faiss.downcast_index(index.index))
+        assert vt2 is None
+        return index_ivf, vt
+    if hasattr(faiss, "IndexRefine") and isinstance(index, faiss.IndexRefine):
+        return unwind_index_ivf(faiss.downcast_index(index.base_index))
+    if isinstance(index, faiss.IndexIVF):
+        return index, None
+    else:
+        return None, None
+
+
+def test_bigann10m(index_file, index_parameters):
+    ds = faiss.contrib.datasets.DatasetBigANN(nb_M=10)
+
+    xq = ds.get_queries()
+    xb = ds.get_database()
+    gt = ds.get_groundtruth()
+
+    nb, d = xb.shape
+    nq, d = xq.shape
+
+    print("Reading index {}".format(index_file))
+    index = faiss.read_index(index_file)
+
+    ps = faiss.ParameterSpace()
+    ps.initialize(index)
+
+    index_ivf, vec_transform = unwind_index_ivf(index)
+
+    print('params                                                                      regular    transp_centroids   regular   R@1    R@10   R@100')
+    for index_parameter in index_parameters:
+        ps.set_index_parameters(index, index_parameter)
+
+        print(index_parameter.ljust(70), end=' ')
+
+        k = 100
+
+        # warmup
+        D, I = index.search(xq, k)
+
+        # warmup
+        D, I = index.search(xq, k)
+
+        # eval
+        t2_0 = time.time()
+        D, I = index.search(xq, k)
+        t2_1 = time.time()
+
+        # eval
+        index_ivf.pq.sync_transposed_centroids()
+        t3_0 = time.time()
+        D, I = index.search(xq, k)
+        t3_1 = time.time()
+
+        # eval
+        index_ivf.pq.clear_transposed_centroids()
+        t4_0 = time.time()
+        D, I = index.search(xq, k)
+        t4_1 = time.time()
+
+        print("   %9.5f  " % (t2_1 - t2_0), end=' ')
+        print("   %9.5f  " % (t3_1 - t3_0), end=' ')
+        print("   %9.5f  " % (t4_1 - t4_0), end=' ')
+
+        for rank in 1, 10, 100:
+            n_ok = (I[:, :rank] == gt[:, :1]).sum()
+            print("%.4f" % (n_ok / float(nq)), end=' ')
+        print()
+
+
+if __name__ == "__main__":
+    faiss.contrib.datasets.dataset_basedir = '/home/aguzhva/ANN_SIFT1B/'
+
+    # represents OPQ32_128,IVF65536_HNSW32,PQ32 index
+    index_file_1 = "/home/aguzhva/ANN_SIFT1B/run_tests/bench_ivf/indexes/hnsw32/.faissindex"
+
+    nprobe_values = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
+    quantizer_efsearch_values = [4, 8, 16, 32, 64, 128, 256, 512]
+    ht_values = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 256]
+
+    # represents OPQ32_128,IVF65536(IVF256,PQHDx4fs,RFlat),PQ32 index
+    index_file_2 = "/home/aguzhva/ANN_SIFT1B/run_tests/bench_ivf/indexes/pq4/.faissindex"
+
+    quantizer_k_factor_rf_values = [1, 2, 4, 8, 16, 32, 64]
+    quantizer_nprobe_values = [1, 2, 4, 8, 16, 32, 64, 128]
+
+    # test the first index
+    index_parameters_1 = []
+    for _ in range(0, 20):
+        nprobe = random.choice(nprobe_values)
+        quantizer_efsearch = random.choice(quantizer_efsearch_values)
+        ht = random.choice(ht_values)
+        index_parameters_1.append(
+            "nprobe={},quantizer_efSearch={},ht={}".format(
+                nprobe,
+                quantizer_efsearch,
+                ht)
+        )
+
+    test_bigann10m(index_file_1, index_parameters_1)
+
+    # test the second index
+    index_parameters_2 = []
+    for _ in range(0, 20):
+        nprobe = random.choice(nprobe_values)
+        quantizer_k_factor_rf = random.choice(quantizer_k_factor_rf_values)
+        quantizer_nprobe = random.choice(quantizer_nprobe_values)
+        ht = random.choice(ht_values)
+        index_parameters_2.append(
+            "nprobe={},quantizer_k_factor_rf={},quantizer_nprobe={},ht={}".format(
+                nprobe,
+                quantizer_k_factor_rf,
+                quantizer_nprobe,
+                ht)
+        )
+
+    test_bigann10m(index_file_2, index_parameters_2)
diff --git a/faiss/impl/ProductQuantizer.cpp b/faiss/impl/ProductQuantizer.cpp
@@ -237,12 +237,26 @@ void compute_code(const ProductQuantizer& pq, const float* x, uint8_t* code) {
     for (size_t m = 0; m < pq.M; m++) {
         const float* xsub = x + m * pq.dsub;
 
-        uint64_t idxm = fvec_L2sqr_ny_nearest(
-                distances.data(),
-                xsub,
-                pq.get_centroids(m, 0),
-                pq.dsub,
-                pq.ksub);
+        uint64_t idxm = 0;
+        if (pq.transposed_centroids.empty()) {
+            // the regular version
+            idxm = fvec_L2sqr_ny_nearest(
+                    distances.data(),
+                    xsub,
+                    pq.get_centroids(m, 0),
+                    pq.dsub,
+                    pq.ksub);
+        } else {
+            // transposed centroids are available, use'em
+            idxm = fvec_L2sqr_ny_nearest_y_transposed(
+                    distances.data(),
+                    xsub,
+                    pq.transposed_centroids.data() + m * pq.ksub,
+                    pq.centroids_sq_lengths.data() + m * pq.ksub,
+                    pq.dsub,
+                    pq.M * pq.ksub,
+                    pq.ksub);
+        }
 
         encoder.encode(idxm);
     }
@@ -819,4 +833,32 @@ void ProductQuantizer::search_sdc(
     }
 }
 
+void ProductQuantizer::sync_transposed_centroids() {
+    transposed_centroids.resize(d * ksub);
+    centroids_sq_lengths.resize(ksub * M);
+
+    for (size_t mi = 0; mi < M; mi++) {
+        for (size_t ki = 0; ki < ksub; ki++) {
+            float sqlen = 0;
+
+            for (size_t di = 0; di < dsub; di++) {
+                const float q = centroids[(mi * ksub + ki) * dsub + di];
+
+                transposed_centroids[(di * M + mi) * ksub + ki] = q;
+                sqlen += q * q;
+            }
+
+            centroids_sq_lengths[mi * ksub + ki] = sqlen;
+        }
+    }
+}
+
+void ProductQuantizer::clear_transposed_centroids() {
+    transposed_centroids.clear();
+    transposed_centroids.shrink_to_fit();
+
+    centroids_sq_lengths.clear();
+    centroids_sq_lengths.shrink_to_fit();
+}
+
 } // namespace faiss
diff --git a/faiss/impl/ProductQuantizer.h b/faiss/impl/ProductQuantizer.h
@@ -49,9 +49,18 @@ struct ProductQuantizer : Quantizer {
     /// d / M)
     Index* assign_index;
 
-    /// Centroid table, size M * ksub * dsub
+    /// Centroid table, size M * ksub * dsub.
+    /// Layout: (M, ksub, dsub)
     std::vector<float> centroids;
 
+    /// Transposed centroid table, size M * ksub * dsub.
+    /// Layout: (dsub, M, ksub)
+    std::vector<float> transposed_centroids;
+
+    /// Squared lengths of centroids, size M * ksub
+    /// Layout: (M, ksub)
+    std::vector<float> centroids_sq_lengths;
+
     /// return the centroids associated with subvector m
     float* get_centroids(size_t m, size_t i) {
         return &centroids[(m * ksub + i) * dsub];
@@ -165,6 +174,13 @@ struct ProductQuantizer : Quantizer {
             const size_t ncodes,
             float_maxheap_array_t* res,
             bool init_finalize_heap = true) const;
+
+    /// Sync transposed centroids with regular centroids. This call
+    /// is needed if centroids were edited directly.
+    void sync_transposed_centroids();
+
+    /// Clear transposed centroids table so ones are no longer used.
+    void clear_transposed_centroids();
 };
 
 // block size used in ProductQuantizer::compute_codes
diff --git a/faiss/utils/distances.h b/faiss/utils/distances.h
@@ -83,6 +83,19 @@ size_t fvec_L2sqr_ny_nearest(
         size_t d,
         size_t ny);
 
+/* compute ny square L2 distance between x and a set of transposed contiguous
+   y vectors and return the index of the nearest vector.
+   squared lengths of y should be provided as well
+   return 0 if ny == 0. */
+size_t fvec_L2sqr_ny_nearest_y_transposed(
+        float* distances_tmp_buffer,
+        const float* x,
+        const float* y,
+        const float* y_sqlen,
+        size_t d,
+        size_t d_offset,
+        size_t ny);
+
 /** squared norm of a vector */
 float fvec_norm_L2sqr(const float* x, size_t d);
 
diff --git a/faiss/utils/distances_simd.cpp b/faiss/utils/distances_simd.cpp
diff --git a/tests/test_product_quantizer.py b/tests/test_product_quantizer.py