Skip to content

Commit 46d8105

Browse files
authored
Adding C++ and Python usage examples. (#185)
Adding C++ example to build a dynamic index with vector compression (LeanVec) and a Python example showing how to build a static index with LVQ.
1 parent 0c588d7 commit 46d8105

File tree

4 files changed

+290
-0
lines changed

4 files changed

+290
-0
lines changed

examples/cpp/shared/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,4 @@ endfunction()
4747
create_example_executable(shared shared.cpp)
4848
create_example_executable(example_vamana_with_compression_lvq example_vamana_with_compression_lvq.cpp)
4949
create_example_executable(example_vamana_with_compression example_vamana_with_compression.cpp)
50+
create_example_executable(example_vamana_with_compression_dynamic example_vamana_with_compression_dynamic.cpp)

examples/cpp/shared/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
These examples utilize LVQ and LeanVec interfaces which are available when linking to a SVS shared/static library, which are published with [releases](https://github.com/intel/ScalableVectorSearch/releases). Note that these examples will _not_ run after building the open source codebase without the shared/static library. These examples include:
1818
- [example_vamana_with_compression.cpp](./example_vamana_with_compression.cpp): Demonstrates building, searching, saving, and reloading an index with a LeanVec-compressed dataset.
1919
- [example_vamana_with_compression_lvq.cpp](./example_vamana_with_compression_lvq.cpp): Demonstrates building, searching, saving, and reloading an index with a LVQ-compressed dataset.
20+
- [example_vamana_with_compression_dynamic.cpp](./example_vamana_with_compression_dynamic.cpp): Demonstrates building, searching, saving, and reloading a dynamic index (allows vector insertions and deletions over time) with a LeanVec-compressed dataset.
2021

2122
See [CMakeLists.txt](./CMakeLists.txt) for details on linking to the SVS shared library and follow the commands below to compile and use the SVS shared library to run shared.cpp example:
2223

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
/*
2+
* Copyright 2025 Intel Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
// SVS
18+
#include "svs/core/recall.h"
19+
#include "svs/extensions/flat/leanvec.h"
20+
#include "svs/extensions/flat/lvq.h"
21+
#include "svs/extensions/vamana/leanvec.h"
22+
#include "svs/extensions/vamana/lvq.h"
23+
#include "svs/orchestrators/dynamic_vamana.h"
24+
#include "svs/orchestrators/exhaustive.h"
25+
#include "svs/orchestrators/vamana.h"
26+
27+
// Alias for blocked Lean dataset that supports resize/compact
28+
using BlockedLean = svs::leanvec::LeanDataset<
29+
svs::leanvec::UsingLVQ<4>,
30+
svs::leanvec::UsingLVQ<8>,
31+
svs::Dynamic,
32+
svs::Dynamic,
33+
svs::data::Blocked<svs::HugepageAllocator<std::byte>>>;
34+
35+
int main() {
36+
// STEP 1: Compress Data with LeanVec, reducing dimensionality to leanvec_dim dimensions
37+
// and using 4 and 8 bits for primary and secondary levels respectively.
38+
//! [Compress data]
39+
const size_t num_threads = 4;
40+
size_t padding = 32;
41+
size_t leanvec_dim = 64;
42+
auto threadpool = svs::threads::as_threadpool(num_threads);
43+
auto loaded =
44+
svs::VectorDataLoader<float>(std::filesystem::path(SVS_DATA_DIR) / "data_f32.svs")
45+
.load();
46+
auto data = BlockedLean::reduce(
47+
loaded,
48+
std::nullopt,
49+
threadpool,
50+
padding,
51+
svs::lib::MaybeStatic<svs::Dynamic>(leanvec_dim)
52+
);
53+
//! [Compress data]
54+
55+
// STEP 2: Build Dynamic Vamana Index with initial set of vectors
56+
//! [Index Build]
57+
auto parameters = svs::index::vamana::VamanaBuildParameters{};
58+
59+
// Create id labels for build set
60+
std::vector<size_t> ids_build(loaded.size());
61+
for (size_t i = 0; i < loaded.size(); ++i) {
62+
ids_build[i] = i;
63+
}
64+
65+
svs::DynamicVamana index = svs::DynamicVamana::build<float>(
66+
parameters,
67+
data,
68+
svs::lib::as_span(ids_build),
69+
svs::distance::DistanceL2(),
70+
num_threads
71+
);
72+
//! [Index Build]
73+
74+
// STEP 3: Add and delete vectors as needed.
75+
//! [Delete vectors]
76+
size_t num_to_delete = 100;
77+
std::vector<size_t> ids_delete(num_to_delete);
78+
for (size_t i = 0; i < ids_delete.size(); ++i) {
79+
ids_delete[i] = i;
80+
}
81+
82+
fmt::print("Deleting {} vectors.\n", ids_delete.size());
83+
84+
index.delete_points(ids_delete);
85+
//! [Delete vectors]
86+
87+
//! [Add vectors]
88+
// Add the deleted vectors back in.
89+
auto points =
90+
svs::data::SimpleData<float, svs::Dynamic>(ids_delete.size(), loaded.dimensions());
91+
92+
size_t i = 0;
93+
for (const auto& j : ids_delete) {
94+
points.set_datum(i, loaded.get_datum(j));
95+
++i;
96+
}
97+
auto points_const_view = points.cview();
98+
99+
fmt::print("Adding {} vectors.\n", ids_delete.size());
100+
101+
index.add_points(points_const_view, svs::lib::as_span(ids_delete), num_threads);
102+
//! [Add vectors]
103+
104+
// STEP 4: Search the Index
105+
//! [Perform Queries]
106+
const size_t search_window_size = 50;
107+
const size_t n_neighbors = 10;
108+
index.set_search_window_size(search_window_size);
109+
110+
auto queries =
111+
svs::load_data<float>(std::filesystem::path(SVS_DATA_DIR) / "queries_f32.fvecs");
112+
auto results = index.search(queries, n_neighbors);
113+
//! [Perform Queries]
114+
115+
//! [Recall]
116+
auto groundtruth = svs::load_data<int>(
117+
std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs"
118+
);
119+
double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors);
120+
121+
fmt::print("Recall@{} = {:.4f}\n", n_neighbors, recall);
122+
fmt::print(
123+
"Note that recall is low because this example is using a dummy random dataset.\n"
124+
);
125+
//! [Recall]
126+
127+
// STEP 5: Saving and reloading the index
128+
//! [Saving Loading]
129+
index.save("config", "graph", "data");
130+
index = svs::DynamicVamana::assemble<float>(
131+
"config",
132+
svs::GraphLoader("graph"),
133+
svs::lib::load_from_disk<BlockedLean>("data", padding),
134+
svs::distance::DistanceL2(),
135+
num_threads
136+
);
137+
//! [Saving Loading]
138+
index.set_search_window_size(search_window_size);
139+
recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors);
140+
141+
fmt::print("Recall@{} after saving and reloading = {:.4f}\n", n_neighbors, recall);
142+
143+
return 0;
144+
}
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# Copyright 2025 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
# Import `unittest` to allow for automated testing.
17+
import unittest
18+
19+
# [imports]
20+
import os
21+
import svs
22+
# [imports]
23+
24+
DEBUG_MODE = False
25+
def assert_equal(lhs, rhs, message: str = "", epsilon = 0.05):
26+
if DEBUG_MODE:
27+
print(f"{message}: {lhs} == {rhs}")
28+
else:
29+
assert lhs < rhs + epsilon, message
30+
assert lhs > rhs - epsilon, message
31+
32+
test_data_dir = None
33+
34+
def run():
35+
# [generate-dataset]
36+
# Create a test dataset.
37+
# This will create a directory "example_data_vamana" and populate it with three
38+
# entries:
39+
# - data.fvecs: The test dataset.
40+
# - queries.fvecs: The test queries.
41+
# - groundtruth.fvecs: The groundtruth.
42+
test_data_dir = "./example_data_vamana"
43+
svs.generate_test_dataset(
44+
1000, # Create 1000 vectors in the dataset.
45+
100, # Generate 100 query vectors.
46+
256, # Set the vector dimensionality to 256.
47+
test_data_dir, # The directory where results will be generated.
48+
data_seed = 1234, # Random number seed for reproducibility.
49+
query_seed = 5678, # Random number seed for reproducibility.
50+
num_threads = 4, # Number of threads to use.
51+
distance = svs.DistanceType.L2, # The distance type to use.
52+
)
53+
# [generate-dataset]
54+
55+
# [create-loader]
56+
# We are going to construct a LeanVec dataset on-the-fly from uncompressed data.
57+
# First, we construct a loader for the uncompressed data.
58+
uncompressed_loader = svs.VectorDataLoader(
59+
os.path.join(test_data_dir, "data.fvecs"),
60+
svs.DataType.float32
61+
)
62+
63+
# Next - we construct a LVQLoader which is configured to use LVQ compression with 4
64+
# bits for the primary and 8 bits for the residual quantization.
65+
B1 = 4 # Number of bits for the first level LVQ quantization
66+
B2 = 8 # Number of bits for the residuals quantization
67+
compressed_loader = svs.LVQLoader(uncompressed_loader,
68+
primary=B1,
69+
residual=B2,
70+
)
71+
# [create-loader]
72+
73+
# An index can be constructed using a LeanVec dataset.
74+
# [build-parameters]
75+
parameters = svs.VamanaBuildParameters(
76+
graph_max_degree = 64,
77+
window_size = 128,
78+
)
79+
# [build-parameters]
80+
81+
# [build-index]
82+
index = svs.Vamana.build(
83+
parameters,
84+
compressed_loader,
85+
svs.DistanceType.L2,
86+
num_threads = 4,
87+
)
88+
# [build-index]
89+
90+
# Set the search window size of the index and perform queries and load the queries.
91+
# [perform-queries]
92+
n_neighbors = 10
93+
index.search_window_size = 20
94+
index.num_threads = 4
95+
96+
queries = svs.read_vecs(os.path.join(test_data_dir, "queries.fvecs"))
97+
I, D = index.search(queries, n_neighbors)
98+
# [perform-queries]
99+
100+
# Compare with the groundtruth.
101+
# [recall]
102+
groundtruth = svs.read_vecs(os.path.join(test_data_dir, "groundtruth.ivecs"))
103+
recall = svs.k_recall_at(groundtruth, I, n_neighbors, n_neighbors)
104+
print(f"Recall = {recall}")
105+
# [recall]
106+
assert_equal(recall, 0.953)
107+
108+
# Finally, we can save the index and reload from a previously saved set of files.
109+
# [saving-loading]
110+
index.save(
111+
os.path.join(test_data_dir, "example_config"),
112+
os.path.join(test_data_dir, "example_graph"),
113+
os.path.join(test_data_dir, "example_data"),
114+
)
115+
116+
index = svs.Vamana(
117+
os.path.join(test_data_dir, "example_config"),
118+
os.path.join(test_data_dir, "example_graph"),
119+
os.path.join(test_data_dir, "example_data"),
120+
svs.DistanceType.L2,
121+
num_threads = 4,
122+
)
123+
# [saving-loading]
124+
125+
126+
#####
127+
##### Main Executable
128+
#####
129+
130+
if __name__ == "__main__":
131+
run()
132+
133+
#####
134+
##### As a unit test.
135+
#####
136+
137+
class VamanaExampleTestCase(unittest.TestCase):
138+
def tearDown(self):
139+
if test_data_dir is not None:
140+
print(f"Removing temporary directory {test_data_dir}")
141+
os.rmdir(test_data_dir)
142+
143+
def test_all(self):
144+
run()

0 commit comments

Comments
 (0)