Skip to content

Commit 9f02344

Browse files
committed
The first version
1 parent 8b0c66a commit 9f02344

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+3305
-191
lines changed

.editorconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,6 @@ indent_size = 2
2929
[*.{yaml,yml}]
3030
indent_size = 2
3131
32+
# Python files
33+
[*.py]
34+
max_line_length = 120

.gitattributes

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,7 @@
5151
*.out filter=lfs diff=lfs merge=lfs -text
5252
*.a filter=lfs diff=lfs merge=lfs -text
5353
*.o filter=lfs diff=lfs merge=lfs -text
54+
55+
# Exclude files from language stats (GitHub Linguist)
56+
*.ipynb linguist-vendored
57+

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,5 @@ Cargo.lock
7979
# Miscellaneous files and directories to ignore
8080
# Add any additional file patterns a directory names that should be ignored down here
8181
.DS_Store
82+
benchmark_results.csv
83+
eval_*.csv

Cargo.toml

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,43 @@
11
[package]
22
name = "vq"
3-
version = "0.1.1"
3+
version = "0.1.3"
44
description = "A vector quantization library for Rust"
55
repository = "https://github.com/habedi/vq"
66
license = "MIT OR Apache-2.0"
77
readme = "README.md"
8-
keywords = ["vq", "vector-quantization", "clustering", "nearest-neighbor", "data-compression"]
8+
keywords = ["vector-quantization", "quantization", "nearest-neighbor", "data-compression", "embeddings"]
99
authors = ["Hassan Abedi <hassan.abedi.t@gmail.com>"]
1010
homepage = "https://github.com/habedi/vq"
1111
documentation = "https://docs.rs/vq"
12-
#categories = ["development-tools"]
12+
categories = ["algorithms", "compression", "data-structures"]
1313
edition = "2021"
14+
rust-version = "1.83"
15+
16+
include = [
17+
"src/*",
18+
"Cargo.toml",
19+
"README.md",
20+
"LICENSE-MIT",
21+
"LICENSE-APACHE"
22+
]
1423

1524
[lib]
1625
name = "vq"
1726
path = "src/lib.rs"
1827

28+
[features]
29+
default = [] # No features enabled by default
30+
binaries = []
31+
1932
[[bin]]
20-
name = "vq"
21-
path = "src/main.rs"
33+
name = "eval"
34+
path = "src/bin/eval.rs"
35+
required-features = ["binaries"]
36+
37+
[[bin]]
38+
name = "vq-examples"
39+
path = "src/bin/vq_examples.rs"
40+
required-features = ["binaries"]
2241

2342
[dependencies]
2443
tracing = "0.1.41"
@@ -28,10 +47,14 @@ rand = "0.9.0"
2847
half = "2.4.1"
2948
nalgebra = "0.33.2"
3049
rayon = "1.10"
50+
anyhow = "1.0.95"
51+
rand_distr = "0.5.0"
52+
serde = { version = "1.0.217", features = ["derive"] }
53+
clap = { version = "4.5.29", features = ["derive"] }
3154

3255
[dev-dependencies]
3356
criterion = { version = "0.5", features = ["html_reports"] }
3457

3558
[[bench]]
36-
name = "my_benchmarks"
59+
name = "main"
3760
harness = false

Makefile

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# Variables
22
PKG = github.com/habedi/vq
3-
BINARY_NAME = $(or $(PROJ_BINARY), $(notdir $(PKG)))
4-
BINARY = target/release/$(BINARY_NAME)
3+
BINARY_NAME = $(or $(PROJ_BINARY), $(notdir $(PKG)-examples))
4+
BINARY = $(BINARY_NAME)
55
PATH := /snap/bin:$(PATH)
66
CARGO_TERM_COLOR = always
7+
RUST_BACKTRACE = 1
8+
RUST_LOG = info
79
DEBUG_VQ = 0
810

911
# Default target
@@ -16,64 +18,77 @@ help: ## Show this help message
1618
.PHONY: format
1719
format: ## Format Rust files
1820
@echo "Formatting Rust files..."
19-
cargo fmt
21+
@cargo fmt
2022

2123
.PHONY: test
2224
test: format ## Run tests
2325
@echo "Running tests..."
24-
DEBUG_VQ=$(DEBUG_VQ) cargo test -- --nocapture
26+
@DEBUG_VQ=$(DEBUG_VQ) cargo test -- --nocapture
2527

2628
.PHONY: coverage
2729
coverage: format ## Generate test coverage report
2830
@echo "Generating test coverage report..."
29-
DEBUG_VQ=$(DEBUG_VQ) cargo tarpaulin --out Xml --out Html
31+
@DEBUG_VQ=$(DEBUG_VQ) cargo tarpaulin --out Xml --out Html
3032

3133
.PHONY: build
3234
build: format ## Build the binary for the current platform
3335
@echo "Building the project..."
34-
DEBUG_VQ=$(DEBUG_VQ) cargo build --release
36+
@DEBUG_VQ=$(DEBUG_VQ) cargo build --release --features binaries
3537

3638
.PHONY: run
3739
run: build ## Build and run the binary
3840
@echo "Running the $(BINARY) binary..."
39-
DEBUG_VQ=$(DEBUG_VQ) ./$(BINARY)
41+
@DEBUG_VQ=$(DEBUG_VQ) cargo run --release --features binaries --bin $(BINARY)
4042

4143
.PHONY: clean
4244
clean: ## Remove generated and temporary files
4345
@echo "Cleaning up..."
44-
cargo clean
46+
@cargo clean
47+
@rm -f benchmark_results.csv
48+
@rm -f eval_*.csv
4549

4650
.PHONY: install-snap
4751
install-snap: ## Install a few dependencies using Snapcraft
4852
@echo "Installing the snap package..."
49-
sudo apt-get update
50-
sudo apt-get install -y snapd
51-
sudo snap refresh
52-
sudo snap install rustup --classic
53+
@sudo apt-get update
54+
@sudo apt-get install -y snapd
55+
@sudo snap refresh
56+
@sudo snap install rustup --classic
5357

5458
.PHONY: install-deps
5559
install-deps: install-snap ## Install development dependencies
5660
@echo "Installing dependencies..."
57-
rustup component add rustfmt clippy
58-
cargo install cargo-tarpaulin
59-
cargo install cargo-audit
61+
@rustup component add rustfmt clippy
62+
@cargo install cargo-tarpaulin
63+
@cargo install cargo-audit
6064

6165
.PHONY: lint
6266
lint: format ## Run linters on Rust files
6367
@echo "Linting Rust files..."
64-
DEBUG_VQ=$(DEBUG_VQ) cargo clippy -- -D warnings
68+
@DEBUG_VQ=$(DEBUG_VQ) cargo clippy -- -D warnings
6569

6670
.PHONY: publish
6771
publish: ## Publish the package to crates.io (requires CARGO_REGISTRY_TOKEN to be set)
6872
@echo "Publishing the package to Cargo registry..."
69-
cargo publish --token $(CARGO_REGISTRY_TOKEN)
73+
@cargo publish --token $(CARGO_REGISTRY_TOKEN)
7074

7175
.PHONY: bench
7276
bench: ## Run benchmarks
7377
@echo "Running benchmarks..."
74-
DEBUG_VQ=$(DEBUG_VQ) cargo bench
78+
@DEBUG_VQ=$(DEBUG_VQ) cargo bench
7579

76-
.PHONY: audit
77-
audit: ## Run security audit on Rust dependencies
78-
@echo "Running security audit..."
79-
cargo audit
80+
.PHONY: eval
81+
eval: ## Evaluate an implementation (the ALG should be the algorithm name, e.g., bq, sq, pq, opq, tsvq, rvq)
82+
@echo && if [ -z "$(ALG)" ]; then echo "Please provide the ALG argument"; exit 1; fi
83+
@echo "Evaluating implementation with argument: $(ALG)"
84+
@cargo run --release --features binaries --bin eval -- --eval $(ALG)
85+
86+
.PHONY: eval-all
87+
eval-all: ## Evaluate all the implementations (bq, sq, pq, opq, tsvq, rvq)
88+
@echo "Evaluating all implementations..."
89+
@make eval ALG=bq
90+
@make eval ALG=sq
91+
@make eval ALG=pq
92+
@make eval ALG=opq
93+
@make eval ALG=tsvq
94+
@make eval ALG=rvq

README.md

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,53 +6,57 @@
66
[<img alt="Crates.io" src="https://img.shields.io/crates/v/vq.svg?style=for-the-badge&color=fc8d62&logo=rust" height="20">](https://crates.io/crates/vq)
77
[<img alt="Docs.rs" src="https://img.shields.io/badge/docs.rs-vq-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs" height="20">](https://docs.rs/vq)
88
[<img alt="Downloads" src="https://img.shields.io/crates/d/vq?style=for-the-badge&labelColor=555555&logo=rust" height="20">](https://crates.io/crates/vq)
9+
[<img alt="MSRV" src="https://img.shields.io/badge/MSRV-1.83.0-orange?style=for-the-badge&labelColor=555555&logo=rust" height="20">](https://github.com/rust-lang/rust/releases/tag/1.83.0)
10+
<br>
911
[<img alt="Docs" src="https://img.shields.io/badge/docs-latest-3776ab?style=for-the-badge&labelColor=555555&logo=readthedocs" height="20">](docs)
1012
[<img alt="License" src="https://img.shields.io/badge/license-MIT%2FApache--2.0-007ec6?style=for-the-badge&labelColor=555555&logo=open-source-initiative" height="20">](https://github.com/habedi/vq)
1113

12-
Vq (**v**[ector] **q**[uantiztion]) is a Rust library that implements several
14+
Vq (**v**[ector] **q**[uantizer]) is a Rust library that implements several
1315
popular [vector quantization](https://en.wikipedia.org/wiki/Vector_quantization) algorithms including binary, scalar,
1416
and product quantization algorithms.
15-
It provides a simple, efficient API for data compression that help reduce memory usage and computational overhead.
17+
It provides a simple, efficient API for data compression that helps reduce memory usage and computational overhead.
1618

1719
## Features
1820

1921
- Implemented Algorithms:
20-
- [**Binary Quantization (BQ)**](src/bq.rs)
21-
- [**Scalar Quantization (SQ)**](src/sq.rs)
22-
- [**Product Quantization (PQ)**](https://ieeexplore.ieee.org/document/5432202)
23-
- [**Optimized Product Quantization (OPQ)**](https://ieeexplore.ieee.org/document/6619223)
24-
- [**Tree-structured Vector Quantization (TSVQ)**](https://ieeexplore.ieee.org/document/515493)
25-
- [**Residual Vector Quantization (RVQ)**](https://pmc.ncbi.nlm.nih.gov/articles/PMC3231071/)
22+
- [Binary Quantization (BQ)](src/bq.rs)
23+
- [Scalar Quantization (SQ)](src/sq.rs)
24+
- [Product Quantization (PQ)](https://ieeexplore.ieee.org/document/5432202)
25+
- [Optimized Product Quantization (OPQ)](https://ieeexplore.ieee.org/document/6619223)
26+
- [Tree-structured Vector Quantization (TSVQ)](https://ieeexplore.ieee.org/document/515493)
27+
- [Residual Vector Quantization (RVQ)](https://pmc.ncbi.nlm.nih.gov/articles/PMC3231071/)
2628

2729
- Parallelized vector operations for large vectors using [Rayon](https://crates.io/crates/rayon).
28-
- Flexible quantization algorithm implementations that support custom distance functions (e.g., Euclidean, Cosine,
29-
Chebyshev, etc.).
30-
- Support for quantizing vectors of `f32` to `f16` (using [half](https://github.com/starkat99/half-rs/tree/main/src)) or `u8` data types.
31-
- Simple and intuitive API for all quantization algorithms.
30+
- Flexible quantization algorithm implementations that support using various distance metrics such as Euclidean, Cosine,
31+
Manhattan distances.
32+
- Support for quantizing vectors of `f32` to `f16` (using [half](https://crates.io/crates/half)) or `u8` data types.
33+
- Simple, intuitive, and uniform API for all quantization algorithms.
3234

3335
## Installation
3436

3537
```bash
3638
cargo add vq
3739
```
3840

41+
*Vq requires Rust 1.83 or later.*
42+
3943
## Documentation
4044

41-
Find the latest documentation [here](docs) or on [docs.rs](https://docs.rs/vq).
45+
Find the latest documentation on [docs.rs](https://docs.rs/vq).
4246

43-
Check out the [tests](tests/) directory for detailed examples of using Vq.
47+
Check out [vq_examples.rs](src/bin/vq_examples.rs) the [tests](tests/) directory for detailed examples of using Vq.
4448

4549
### Quick Example
4650

47-
Here's a simple example using the scalar quantization:
51+
Here's a simple example using the SQ algorithm to quantize a vector:
4852

4953
```rust
5054
use vq::sq::ScalarQuantizer;
5155
use vq::vector::Vector;
5256

5357
fn main() {
5458
// Create a scalar quantizer for values in the range [0.0, 1.0] with 256 levels.
55-
let quantizer = ScalarQuantizer::new(0.0, 1.0, 256);
59+
let quantizer = ScalarQuantizer::fit(0.0, 1.0, 256);
5660

5761
// Create an input vector.
5862
let input = Vector::new(vec![0.1, 0.5, -0.8, -0.3, 0.9]);
@@ -64,9 +68,22 @@ fn main() {
6468
}
6569
```
6670

71+
## Performance
72+
73+
Check out the [notebooks](notebooks/) directory for information on how to evaluate the performance of the implemented
74+
algorithms.
75+
Additionally, see the content of [src/bin](src/bin/) directory for the scripts used for the evaluation.
76+
77+
> On a ThinkPad T14 laptop with an Intel i7-1355U CPU and 32GB of RAM, the performance of the PQ algorithm for
78+
> quantizing one million vectors of 128 dimensions (into 16 subspaces with 256 centroids per subspace) is as follows:
79+
> - Training Time: 232.5 seconds
80+
> - Quantization Time: 34.1 seconds
81+
> - Reconstruction Error: 0.02
82+
> - Recall@10: 0.19
83+
6784
## Contributing
6885

69-
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to contribute.
86+
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for details on contributing.
7087

7188
## License
7289

0 commit comments

Comments
 (0)