Skip to content

Commit 0c588d7

Browse files
authored
Add support for compressed vector search to IVF (#184)
- Add support for IVF search with SQData - Add Python tests for IVF - Enable IVF tests in CI
1 parent 3d27bda commit 0c588d7

File tree

15 files changed

+516
-34
lines changed

15 files changed

+516
-34
lines changed

.github/workflows/build-linux.yml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,12 @@ concurrency:
3030

3131
jobs:
3232
build:
33-
name: ${{ matrix.cxx }}, ${{ matrix.build_type }}
33+
name: ${{ matrix.cxx }}, ${{ matrix.build_type }}, ivf=${{ matrix.ivf }}
3434
runs-on: ubuntu-22.04
3535
strategy:
3636
matrix:
3737
build_type: [RelWithDebugInfo]
38+
ivf: [OFF, ON]
3839
cxx: [g++-11, g++-12, clang++-15]
3940
include:
4041
- cxx: g++-11
@@ -43,6 +44,9 @@ jobs:
4344
cc: gcc-12
4445
- cxx: clang++-15
4546
cc: clang-15
47+
exclude:
48+
- cxx: g++-12
49+
ivf: ON
4650

4751
steps:
4852
- uses: actions/checkout@v5
@@ -69,7 +73,8 @@ jobs:
6973
-DSVS_BUILD_TESTS=YES \
7074
-DSVS_BUILD_EXAMPLES=YES \
7175
-DSVS_EXPERIMENTAL_LEANVEC=YES \
72-
-DSVS_NO_AVX512=NO
76+
-DSVS_NO_AVX512=NO \
77+
-DSVS_EXPERIMENTAL_ENABLE_IVF=${{ matrix.ivf }}
7378
7479
- name: Build Tests and Utilities
7580
working-directory: ${{ runner.temp }}/build

benchmark/include/svs-benchmark/ivf/test.h

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,6 @@ struct IVFTest {
4848
std::filesystem::path graph_;
4949
std::filesystem::path queries_f32_;
5050
size_t queries_in_training_set_;
51-
// Backend-specific members
52-
std::filesystem::path leanvec_data_matrix_;
53-
std::filesystem::path leanvec_query_matrix_;
5451
// Runtime values
5552
size_t num_threads_;
5653

@@ -62,9 +59,6 @@ struct IVFTest {
6259
std::filesystem::path graph,
6360
std::filesystem::path queries_f32,
6461
size_t queries_in_training_set,
65-
// backend-specific members
66-
std::filesystem::path leanvec_data_matrix,
67-
std::filesystem::path leanvec_query_matrix,
6862
// Runtime values
6963
size_t num_threads
7064
)
@@ -74,8 +68,6 @@ struct IVFTest {
7468
, graph_{std::move(graph)}
7569
, queries_f32_{std::move(queries_f32)}
7670
, queries_in_training_set_{queries_in_training_set}
77-
, leanvec_data_matrix_{std::move(leanvec_data_matrix)}
78-
, leanvec_query_matrix_{std::move(leanvec_query_matrix)}
7971
, num_threads_{num_threads} {}
8072

8173
static IVFTest example() {
@@ -86,8 +78,6 @@ struct IVFTest {
8678
"path/to/graph", // graph
8779
"path/to/queries_f32", // queries_f32
8880
10000, // queries_in_training_set
89-
"path/to/leanvec_data_matrix", // LeanVec data matrix
90-
"path/to/leanvec_query_matrix", // LeanVec query matrix
9181
0, // Num Threads (not-saved)
9282
};
9383
}
@@ -113,9 +103,7 @@ struct IVFTest {
113103
SVS_LIST_SAVE_(index_config),
114104
SVS_LIST_SAVE_(graph),
115105
SVS_LIST_SAVE_(queries_f32),
116-
SVS_LIST_SAVE_(queries_in_training_set),
117-
SVS_LIST_SAVE_(leanvec_data_matrix),
118-
SVS_LIST_SAVE_(leanvec_query_matrix)}
106+
SVS_LIST_SAVE_(queries_in_training_set)}
119107
);
120108
}
121109

@@ -131,8 +119,6 @@ struct IVFTest {
131119
svsbenchmark::extract_filename(table, "graph", root),
132120
svsbenchmark::extract_filename(table, "queries_f32", root),
133121
SVS_LOAD_MEMBER_AT_(table, queries_in_training_set),
134-
svsbenchmark::extract_filename(table, "leanvec_data_matrix", root),
135-
svsbenchmark::extract_filename(table, "leanvec_query_matrix", root),
136122
num_threads};
137123
}
138124
};

bindings/python/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ if (SVS_EXPERIMENTAL_ENABLE_IVF)
4747
)
4848
endif()
4949

50-
5150
set(LIB_NAME "_svs")
5251
pybind11_add_module(${LIB_NAME} MODULE ${CPP_FILES})
5352
target_link_libraries(${LIB_NAME} PRIVATE pybind11::module)

bindings/python/include/svs/python/ivf.h

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,9 @@ template <typename F> void for_standard_specializations(F&& f) {
5151
// Pattern:
5252
// QueryType, DataType, Dimensionality, Enable Building
5353
// clang-format off
54-
X(float, svs::BFloat16, 512, EnableBuild::FromFileAndArray);
55-
56-
XN(float, float, 512);
57-
XN(float, svs::Float16, 512);
58-
5954
X(float, svs::BFloat16, Dynamic, EnableBuild::FromFileAndArray);
60-
XN(float, float, Dynamic);
61-
XN(float, svs::Float16, Dynamic);
55+
X(float, float, Dynamic, EnableBuild::FromFileAndArray);
56+
X(float, svs::Float16, Dynamic, EnableBuild::FromFileAndArray);
6257
// clang-format on
6358
#undef XN
6459
#undef X

bindings/python/setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
cmake_args = [
2323
# Export compile commands to allow us to explore compiler flags as needed.
2424
"-DCMAKE_EXPORT_COMPILE_COMMANDS=YES",
25+
"-DSVS_EXPERIMENTAL_ENABLE_IVF=YES ",
26+
"-DSVS_EXPERIMENTAL_BUILD_CUSTOM_MKL=YES ",
2527
]
2628

2729
# Determine the root of the repository

bindings/python/src/ivf.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ void wrap(py::module& m) {
521521
py::arg("num_centroids") = 1000,
522522
py::arg("minibatch_size") = 10'000,
523523
py::arg("num_iterations") = 10,
524-
py::arg("is_hierarchical") = false,
524+
py::arg("is_hierarchical") = true,
525525
py::arg("training_fraction") = 0.1,
526526
py::arg("hierarchical_level1_clusters") = 0,
527527
py::arg("seed") = 0xc0ffee,

bindings/python/tests/common.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,12 @@
4040
test_groundtruth_cosine = str(TEST_DATASET_DIR.joinpath("groundtruth_cosine.ivecs"))
4141
test_vamana_reference = str(TEST_DATASET_DIR.joinpath("reference/vamana_reference.toml"))
4242

43+
test_ivf_clustering = str(TEST_DATASET_DIR.joinpath("ivf_clustering"))
44+
test_ivf_reference = str(TEST_DATASET_DIR.joinpath("reference/ivf_reference.toml"))
45+
4346
test_number_of_vectors = 10000
4447
test_dimensions = 128
48+
test_number_of_clusters = 128
4549

4650
#####
4751
##### Helper Functions

0 commit comments

Comments
 (0)