Skip to content

Commit 3d27bda

Browse files
authored
Add Git hook for clang-format with pre-commit (#187)
Also add CI job for checking format. Also format existing code.
1 parent 3d00216 commit 3d27bda

File tree

20 files changed

+160
-95
lines changed

20 files changed

+160
-95
lines changed

.github/workflows/pre-commit.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Copyright 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
name: pre-commit
16+
17+
on:
18+
pull_request:
19+
20+
jobs:
21+
pre-commit:
22+
runs-on: ubuntu-24.04
23+
steps:
24+
- uses: actions/checkout@v5
25+
- uses: actions/setup-python@v6
26+
with:
27+
python-version: '3.12'
28+
- uses: pre-commit/[email protected]

.pre-commit-config.yaml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Copyright 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
repos:
16+
- repo: local
17+
hooks:
18+
- id: clang-format
19+
name: clang-format
20+
language: python
21+
entry: tools/clang-format.sh
22+
args: [clang-format]
23+
additional_dependencies: ["clang-format>=15,<16"]

bindings/python/src/dynamic_vamana.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,7 @@ svs::DynamicVamana dynamic_vamana_build_uncompressed(
104104
size_t num_threads
105105
) {
106106
return svs::DynamicVamana::build<Q>(
107-
parameters,
108-
std::move(data_loader),
109-
ids,
110-
distance_type,
111-
num_threads
107+
parameters, std::move(data_loader), ids, distance_type, num_threads
112108
);
113109
}
114110

examples/cpp/shared/example_vamana_with_compression.cpp

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,26 +24,37 @@
2424
#include "svs/orchestrators/exhaustive.h"
2525
#include "svs/orchestrators/vamana.h"
2626

27-
2827
int main() {
29-
// STEP 1: Compress Data with LeanVec, reducing dimensionality to leanvec_dim dimensions and using
30-
// 4 and 8 bits for primary and secondary levels respectively.
31-
//! [Compress data]
28+
// STEP 1: Compress Data with LeanVec, reducing dimensionality to leanvec_dim dimensions
29+
// and using 4 and 8 bits for primary and secondary levels respectively.
30+
//! [Compress data]
3231
const size_t num_threads = 4;
3332
size_t padding = 32;
3433
size_t leanvec_dim = 64;
3534
auto threadpool = svs::threads::as_threadpool(num_threads);
36-
auto loaded = svs::VectorDataLoader<float>(std::filesystem::path(SVS_DATA_DIR) / "data_f32.svs").load();
37-
auto data = svs::leanvec::LeanDataset<svs::leanvec::UsingLVQ<4>, svs::leanvec::UsingLVQ<8>, svs::Dynamic, svs::Dynamic>::reduce(
38-
loaded, std::nullopt, threadpool, padding, svs::lib::MaybeStatic<svs::Dynamic>(leanvec_dim)
39-
);
35+
auto loaded =
36+
svs::VectorDataLoader<float>(std::filesystem::path(SVS_DATA_DIR) / "data_f32.svs")
37+
.load();
38+
auto data = svs::leanvec::LeanDataset<
39+
svs::leanvec::UsingLVQ<4>,
40+
svs::leanvec::UsingLVQ<8>,
41+
svs::Dynamic,
42+
svs::Dynamic>::
43+
reduce(
44+
loaded,
45+
std::nullopt,
46+
threadpool,
47+
padding,
48+
svs::lib::MaybeStatic<svs::Dynamic>(leanvec_dim)
49+
);
4050
//! [Compress data]
4151

42-
43-
// STEP 2: Build Vamana Index
52+
// STEP 2: Build Vamana Index
4453
//! [Index Build]
4554
auto parameters = svs::index::vamana::VamanaBuildParameters{};
46-
svs::Vamana index = svs::Vamana::build<float>(parameters, data, svs::distance::DistanceL2(), num_threads);
55+
svs::Vamana index = svs::Vamana::build<float>(
56+
parameters, data, svs::distance::DistanceL2(), num_threads
57+
);
4758
//! [Index Build]
4859

4960
// STEP 3: Search the Index
@@ -52,12 +63,15 @@ int main() {
5263
const size_t n_neighbors = 10;
5364
index.set_search_window_size(search_window_size);
5465

55-
auto queries = svs::load_data<float>(std::filesystem::path(SVS_DATA_DIR) / "queries_f32.fvecs");
66+
auto queries =
67+
svs::load_data<float>(std::filesystem::path(SVS_DATA_DIR) / "queries_f32.fvecs");
5668
auto results = index.search(queries, n_neighbors);
5769
//! [Perform Queries]
5870

5971
//! [Recall]
60-
auto groundtruth = svs::load_data<int>(std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs");
72+
auto groundtruth = svs::load_data<int>(
73+
std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs"
74+
);
6175
double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors);
6276

6377
fmt::print("Recall@{} = {:.4f}\n", n_neighbors, recall);
@@ -66,15 +80,22 @@ int main() {
6680
// STEP 4: Saving and reloading the index
6781
//! [Saving Loading]
6882
index.save("config", "graph", "data");
69-
index = svs::Vamana::assemble<float>(
70-
"config", svs::GraphLoader("graph"), svs::lib::load_from_disk<svs::leanvec::LeanDataset<svs::leanvec::UsingLVQ<4>, svs::leanvec::UsingLVQ<8>, svs::Dynamic, svs::Dynamic>>("data", padding), svs::distance::DistanceL2(), num_threads
83+
index = svs::Vamana::assemble<float>(
84+
"config",
85+
svs::GraphLoader("graph"),
86+
svs::lib::load_from_disk<svs::leanvec::LeanDataset<
87+
svs::leanvec::UsingLVQ<4>,
88+
svs::leanvec::UsingLVQ<8>,
89+
svs::Dynamic,
90+
svs::Dynamic>>("data", padding),
91+
svs::distance::DistanceL2(),
92+
num_threads
7193
);
7294
//! [Saving Loading]
7395
index.set_search_window_size(search_window_size);
7496
recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors);
7597

7698
fmt::print("Recall@{} after saving and reloading = {:.4f}\n", n_neighbors, recall);
7799

78-
79100
return 0;
80101
}

examples/cpp/shared/example_vamana_with_compression_lvq.cpp

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,25 @@
2424
#include "svs/orchestrators/exhaustive.h"
2525
#include "svs/orchestrators/vamana.h"
2626

27-
2827
int main() {
2928
// STEP 1: Compress Data with LVQ
3029
//! [Compress data]
3130
size_t padding = 32;
3231
const size_t num_threads = 4;
3332
auto threadpool = svs::threads::as_threadpool(num_threads);
34-
auto loaded = svs::VectorDataLoader<float>(std::filesystem::path(SVS_DATA_DIR) / "data_f32.svs").load();
35-
auto data = svs::quantization::lvq::LVQDataset<4, 8>::compress(loaded, threadpool, padding);
33+
auto loaded =
34+
svs::VectorDataLoader<float>(std::filesystem::path(SVS_DATA_DIR) / "data_f32.svs")
35+
.load();
36+
auto data =
37+
svs::quantization::lvq::LVQDataset<4, 8>::compress(loaded, threadpool, padding);
3638
//! [Compress data]
3739

38-
// STEP 2: Build Vamana Index
40+
// STEP 2: Build Vamana Index
3941
//! [Index Build]
4042
auto parameters = svs::index::vamana::VamanaBuildParameters{};
41-
svs::Vamana index = svs::Vamana::build<float>(parameters, data, svs::distance::DistanceL2(), num_threads);
43+
svs::Vamana index = svs::Vamana::build<float>(
44+
parameters, data, svs::distance::DistanceL2(), num_threads
45+
);
4246
//! [Index Build]
4347

4448
// STEP 3: Search the Index
@@ -47,12 +51,15 @@ int main() {
4751
const size_t n_neighbors = 10;
4852
index.set_search_window_size(search_window_size);
4953

50-
auto queries = svs::load_data<float>(std::filesystem::path(SVS_DATA_DIR) / "queries_f32.fvecs");
54+
auto queries =
55+
svs::load_data<float>(std::filesystem::path(SVS_DATA_DIR) / "queries_f32.fvecs");
5156
auto results = index.search(queries, n_neighbors);
5257
//! [Perform Queries]
5358

5459
//! [Recall]
55-
auto groundtruth = svs::load_data<int>(std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs");
60+
auto groundtruth = svs::load_data<int>(
61+
std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs"
62+
);
5663
double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors);
5764

5865
fmt::print("Recall@{} = {:.4f}\n", n_neighbors, recall);
@@ -62,14 +69,17 @@ int main() {
6269
//! [Saving Loading]
6370
index.save("config", "graph", "data");
6471
index = svs::Vamana::assemble<float>(
65-
"config", svs::GraphLoader("graph"), svs::lib::load_from_disk<svs::quantization::lvq::LVQDataset<4, 8>>("data", padding), svs::distance::DistanceL2(), num_threads
72+
"config",
73+
svs::GraphLoader("graph"),
74+
svs::lib::load_from_disk<svs::quantization::lvq::LVQDataset<4, 8>>("data", padding),
75+
svs::distance::DistanceL2(),
76+
num_threads
6677
);
6778
//! [Saving Loading]
6879
index.set_search_window_size(search_window_size);
6980
recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors);
7081

7182
fmt::print("Recall@{} after saving and reloading = {:.4f}\n", n_neighbors, recall);
7283

73-
7484
return 0;
7585
}

include/svs/index/ivf/common.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -688,9 +688,6 @@ std::vector<std::vector<I>> group_assignments(
688688
return clusters;
689689
}
690690

691-
692-
693-
694691
template <typename Query, typename Dist, typename MatMulResults, typename Buffer>
695692
void search_centroids(
696693
const Query& query,

include/svs/index/ivf/hierarchical_kmeans.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
// stdlib
2222
#include <cmath>
2323

24-
2524
namespace svs::index::ivf {
2625

2726
/// @brief Calculate the number of level 2 clusters for each level 1 cluster
@@ -104,8 +103,9 @@ auto hierarchical_kmeans_clustering_impl(
104103
}
105104
auto rng = std::mt19937(parameters.seed_);
106105
std::vector<size_t> v(num_training_data);
107-
auto data_train =
108-
make_training_set<BuildType, Data, Alloc>(data, v, num_training_data, rng, threadpool);
106+
auto data_train = make_training_set<BuildType, Data, Alloc>(
107+
data, v, num_training_data, rng, threadpool
108+
);
109109

110110
// Step 2: Init centroids for level 1
111111
v.resize(num_level1_clusters);
@@ -134,7 +134,8 @@ auto hierarchical_kmeans_clustering_impl(
134134

135135
// Step 4: Assign training data to clusters
136136
auto data_norm = maybe_compute_norms<Distance>(data_train, threadpool);
137-
auto centroids_level1_norm = maybe_compute_norms<Distance>(centroids_level1_fp32, threadpool);
137+
auto centroids_level1_norm =
138+
maybe_compute_norms<Distance>(centroids_level1_fp32, threadpool);
138139

139140
for (size_t batch = 0; batch < num_batches; ++batch) {
140141
auto this_batch = threads::UnitRange{

include/svs/index/ivf/kmeans.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,9 @@ auto kmeans_clustering_impl(
5757
}
5858
auto rng = std::mt19937(parameters.seed_);
5959
std::vector<size_t> v(num_training_data);
60-
auto data_train =
61-
make_training_set<BuildType, Data, Alloc>(data, v, num_training_data, rng, threadpool);
60+
auto data_train = make_training_set<BuildType, Data, Alloc>(
61+
data, v, num_training_data, rng, threadpool
62+
);
6263

6364
// Step 2: Init centroids by randomly selecting from training set
6465
v.resize(num_centroids);

include/svs/index/vamana/dynamic_search_buffer.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,8 @@ template <typename Idx, typename Cmp = std::less<>> class MutableBuffer {
580580
/// If the number of valid candidates is *less* than the target, a negative number
581581
/// is returned.
582582
int64_t slack() const {
583-
return lib::narrow_cast<int64_t>(valid()) - lib::narrow_cast<int64_t>(target_capacity());
583+
return lib::narrow_cast<int64_t>(valid()) -
584+
lib::narrow_cast<int64_t>(target_capacity());
584585
}
585586

586587
/// Return the index of the first preceding valid candidate beginning at the provided

include/svs/lib/misc.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -446,16 +446,14 @@ using DefaultPredicate = std::function<bool()>;
446446
///
447447
/// Returns true if at least one element of the span is NaN.
448448
///
449-
template <typename T, size_t N>
450-
bool contains_nan(std::span<const T, N> data) {
449+
template <typename T, size_t N> bool contains_nan(std::span<const T, N> data) {
451450
return std::any_of(data.begin(), data.end(), [](T v) { return std::isnan(v); });
452451
}
453452

454453
///
455454
/// Returns true if all elements of the span are NaN.
456455
///
457-
template <typename T, size_t N>
458-
bool all_nan(std::span<const T, N> data) {
456+
template <typename T, size_t N> bool all_nan(std::span<const T, N> data) {
459457
return std::all_of(data.begin(), data.end(), [](T v) { return std::isnan(v); });
460458
}
461459

0 commit comments

Comments
 (0)