CogitatorTech
diff --git a/‎.editorconfig‎
Lines changed: 3 additions & 0 deletions b/‎.editorconfig‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.gitattributes‎
Lines changed: 4 additions & 0 deletions b/‎.gitattributes‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 14 additions & 6 deletions b/‎Cargo.toml‎
Lines changed: 14 additions & 6 deletions
diff --git a/‎Makefile‎
Lines changed: 36 additions & 21 deletions b/‎Makefile‎
Lines changed: 36 additions & 21 deletions
diff --git a/‎README.md‎
Lines changed: 33 additions & 16 deletions b/‎README.md‎
Lines changed: 33 additions & 16 deletions
diff --git a/‎benches/bq_bench.rs‎
Lines changed: 106 additions & 0 deletions b/‎benches/bq_bench.rs‎
Lines changed: 106 additions & 0 deletions
@@ -29,3 +29,6 @@ indent_size = 2
 [*.{yaml,yml}]
 indent_size = 2
 
+# Python files
+[*.py]
+max_line_length = 120
@@ -51,3 +51,7 @@
 *.out filter=lfs diff=lfs merge=lfs -text
 *.a filter=lfs diff=lfs merge=lfs -text
 *.o filter=lfs diff=lfs merge=lfs -text
+
+# Exclude files from language stats (GitHub Linguist)
+*.ipynb linguist-vendored
+
@@ -11,7 +11,7 @@ jobs:
     strategy:
       matrix:
         # Define the Rust versions to test against
-        rust-version: [ "1.83.0", "1.84.0", "beta", "stable" ]
+        rust-version: [ "1.80.0", "1.83.0", "1.84.0", "beta", "stable" ]
 
     steps:
       - name: Checkout Repository
 
@@ -79,3 +79,5 @@ Cargo.lock
 # Miscellaneous files and directories to ignore
 # Add any additional file patterns a directory names that should be ignored down here
 .DS_Store
+benchmark_results.csv
+eval_*.csv
@@ -1,24 +1,28 @@
 [package]
 name = "vq"
-version = "0.1.1"
+version = "0.1.2"
 description = "A vector quantization library for Rust"
 repository = "https://github.com/habedi/vq"
 license = "MIT OR Apache-2.0"
 readme = "README.md"
-keywords = ["vq", "vector-quantization", "clustering", "nearest-neighbor", "data-compression"]
+keywords = ["vector-quantization", "quantization", "nearest-neighbor", "data-compression", "embeddings"]
 authors = ["Hassan Abedi <[email protected]>"]
 homepage = "https://github.com/habedi/vq"
 documentation = "https://docs.rs/vq"
-#categories = ["development-tools"]
+categories = ["algorithms", "compression", "data-structures"]
 edition = "2021"
 
 [lib]
 name = "vq"
 path = "src/lib.rs"
 
 [[bin]]
-name = "vq"
-path = "src/main.rs"
+name = "vq-examples"
+path = "src/bin/examples.rs"
+
+[[bin]]
+name = "eval"
+path = "src/bin/eval.rs"
 
 [dependencies]
 tracing = "0.1.41"
@@ -28,10 +32,14 @@ rand = "0.9.0"
 half = "2.4.1"
 nalgebra = "0.33.2"
 rayon = "1.10"
+anyhow = "1.0.95"
+rand_distr = "0.5.0"
+serde = { version = "1.0.217", features = ["derive"] }
+clap = { version = "4.5.29", features = ["derive"] }
 
 [dev-dependencies]
 criterion = { version = "0.5", features = ["html_reports"] }
 
 [[bench]]
-name = "my_benchmarks"
+name = "main"
 harness = false
@@ -1,9 +1,11 @@
 # Variables
 PKG = github.com/habedi/vq
-BINARY_NAME = $(or $(PROJ_BINARY), $(notdir $(PKG)))
+BINARY_NAME = $(or $(PROJ_BINARY), $(notdir $(PKG)-examples))
 BINARY = target/release/$(BINARY_NAME)
 PATH := /snap/bin:$(PATH)
 CARGO_TERM_COLOR = always
+RUST_BACKTRACE = 1
+RUST_LOG = info
 DEBUG_VQ = 0
 
 # Default target
@@ -16,64 +18,77 @@ help: ## Show this help message
 .PHONY: format
 format: ## Format Rust files
 	@echo "Formatting Rust files..."
-	cargo fmt
+	@cargo fmt
 
 .PHONY: test
 test: format ## Run tests
 	@echo "Running tests..."
-	DEBUG_VQ=$(DEBUG_VQ) cargo test -- --nocapture
+	@DEBUG_VQ=$(DEBUG_VQ) cargo test -- --nocapture
 
 .PHONY: coverage
 coverage: format ## Generate test coverage report
 	@echo "Generating test coverage report..."
-	DEBUG_VQ=$(DEBUG_VQ) cargo tarpaulin --out Xml --out Html
+	@DEBUG_VQ=$(DEBUG_VQ) cargo tarpaulin --out Xml --out Html
 
 .PHONY: build
 build: format ## Build the binary for the current platform
 	@echo "Building the project..."
-	DEBUG_VQ=$(DEBUG_VQ) cargo build --release
+	@DEBUG_VQ=$(DEBUG_VQ) cargo build --release
 
 .PHONY: run
 run: build ## Build and run the binary
 	@echo "Running the $(BINARY) binary..."
-	DEBUG_VQ=$(DEBUG_VQ) ./$(BINARY)
+	@DEBUG_VQ=$(DEBUG_VQ) ./$(BINARY)
 
 .PHONY: clean
 clean: ## Remove generated and temporary files
 	@echo "Cleaning up..."
-	cargo clean
+	@cargo clean
+	@rm -f benchmark_results.csv
+	@rm -f eval_*.csv
 
 .PHONY: install-snap
 install-snap: ## Install a few dependencies using Snapcraft
 	@echo "Installing the snap package..."
-	sudo apt-get update
-	sudo apt-get install -y snapd
-	sudo snap refresh
-	sudo snap install rustup --classic
+	@sudo apt-get update
+	@sudo apt-get install -y snapd
+	@sudo snap refresh
+	@sudo snap install rustup --classic
 
 .PHONY: install-deps
 install-deps: install-snap ## Install development dependencies
 	@echo "Installing dependencies..."
-	rustup component add rustfmt clippy
-	cargo install cargo-tarpaulin
-	cargo install cargo-audit
+	@rustup component add rustfmt clippy
+	@cargo install cargo-tarpaulin
+	@cargo install cargo-audit
 
 .PHONY: lint
 lint: format ## Run linters on Rust files
 	@echo "Linting Rust files..."
-	DEBUG_VQ=$(DEBUG_VQ) cargo clippy -- -D warnings
+	@DEBUG_VQ=$(DEBUG_VQ) cargo clippy -- -D warnings
 
 .PHONY: publish
 publish: ## Publish the package to crates.io (requires CARGO_REGISTRY_TOKEN to be set)
 	@echo "Publishing the package to Cargo registry..."
-	cargo publish --token $(CARGO_REGISTRY_TOKEN)
+	@cargo publish --token $(CARGO_REGISTRY_TOKEN)
 
 .PHONY: bench
 bench: ## Run benchmarks
 	@echo "Running benchmarks..."
-	DEBUG_VQ=$(DEBUG_VQ) cargo bench
+	@DEBUG_VQ=$(DEBUG_VQ) cargo bench
 
-.PHONY: audit
-audit: ## Run security audit on Rust dependencies
-	@echo "Running security audit..."
-	cargo audit
+.PHONY: eval
+eval: ## Evaluate an implementation (the ALG should be the algorithm name, e.g., bq, sq, pq, opq, tsvq, rvq)
+	@echo && if [ -z "$(ALG)" ]; then echo "Please provide the ALG argument"; exit 1; fi
+	@echo "Evaluating implementation with argument: $(ALG)"
+	@cargo run --release --bin eval -- --eval $(ALG)
+
+.PHONY: eval-all
+eval-all: ## Evaluate all the implementations (bq, sq, pq, opq, tsvq, rvq)
+	@echo "Evaluating all implementations..."
+	@cargo run --release --bin eval -- --eval bq
+	@cargo run --release --bin eval -- --eval sq
+	@cargo run --release --bin eval -- --eval pq
+	@cargo run --release --bin eval -- --eval opq
+	@cargo run --release --bin eval -- --eval tsvq
+	@cargo run --release --bin eval -- --eval rvq
@@ -6,53 +6,56 @@
 [<img alt="Crates.io" src="https://img.shields.io/crates/v/vq.svg?style=for-the-badge&color=fc8d62&logo=rust" height="20">](https://crates.io/crates/vq)
 [<img alt="Docs.rs" src="https://img.shields.io/badge/docs.rs-vq-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs" height="20">](https://docs.rs/vq)
 [<img alt="Downloads" src="https://img.shields.io/crates/d/vq?style=for-the-badge&labelColor=555555&logo=rust" height="20">](https://crates.io/crates/vq)
+<br>
 [<img alt="Docs" src="https://img.shields.io/badge/docs-latest-3776ab?style=for-the-badge&labelColor=555555&logo=readthedocs" height="20">](docs)
 [<img alt="License" src="https://img.shields.io/badge/license-MIT%2FApache--2.0-007ec6?style=for-the-badge&labelColor=555555&logo=open-source-initiative" height="20">](https://github.com/habedi/vq)
 
-Vq (**v**[ector] **q**[uantiztion]) is a Rust library that implements several
+Vq (**v**[ector] **q**[uantizer]) is a Rust library that implements several
 popular [vector quantization](https://en.wikipedia.org/wiki/Vector_quantization) algorithms including binary, scalar,
 and product quantization algorithms.
-It provides a simple, efficient API for data compression that help reduce memory usage and computational overhead.
+It provides a simple, efficient API for data compression that helps reduce memory usage and computational overhead.
 
 ## Features
 
 - Implemented Algorithms:
-    - [**Binary Quantization (BQ)**](src/bq.rs)
-    - [**Scalar Quantization (SQ)**](src/sq.rs)
-    - [**Product Quantization (PQ)**](https://ieeexplore.ieee.org/document/5432202)
-    - [**Optimized Product Quantization (OPQ)**](https://ieeexplore.ieee.org/document/6619223)
-    - [**Tree-structured Vector Quantization (TSVQ)**](https://ieeexplore.ieee.org/document/515493)
-    - [**Residual Vector Quantization (RVQ)**](https://pmc.ncbi.nlm.nih.gov/articles/PMC3231071/)
+    - [Binary Quantization (BQ)](src/bq.rs)
+    - [Scalar Quantization (SQ)](src/sq.rs)
+    - [Product Quantization (PQ)](https://ieeexplore.ieee.org/document/5432202)
+    - [Optimized Product Quantization (OPQ)](https://ieeexplore.ieee.org/document/6619223)
+    - [Tree-structured Vector Quantization (TSVQ)](https://ieeexplore.ieee.org/document/515493)
+    - [Residual Vector Quantization (RVQ)](https://pmc.ncbi.nlm.nih.gov/articles/PMC3231071/)
 
 - Parallelized vector operations for large vectors using [Rayon](https://crates.io/crates/rayon).
-- Flexible quantization algorithm implementations that support custom distance functions (e.g., Euclidean, Cosine,
-  Chebyshev, etc.).
-- Support for quantizing vectors of `f32` to `f16` (using [half](https://github.com/starkat99/half-rs/tree/main/src)) or `u8` data types.
-- Simple and intuitive API for all quantization algorithms.
+- Flexible quantization algorithm implementations that support using various distance metrics such as Euclidean, Cosine,
+  Manhattan distances.
+- Support for quantizing vectors of `f32` to `f16` (using [half](https://crates.io/crates/half)) or `u8` data types.
+- Simple, intuitive, and uniform API for all quantization algorithms.
 
 ## Installation
 
 ```bash
 cargo add vq
 ```
 
+*Vq requires Rust 1.80 or later.*
+
 ## Documentation
 
 Find the latest documentation [here](docs) or on [docs.rs](https://docs.rs/vq).
 
-Check out the [tests](tests/) directory for detailed examples of using Vq.
+Check out [examples.rs](src/bin/examples.rs) the [tests](tests/) directory for detailed examples of using Vq.
 
 ### Quick Example
 
-Here's a simple example using the scalar quantization:
+Here's a simple example using the SQ algorithm to quantize a vector:
 
 ```rust
 use vq::sq::ScalarQuantizer;
 use vq::vector::Vector;
 
 fn main() {
     // Create a scalar quantizer for values in the range [0.0, 1.0] with 256 levels.
-    let quantizer = ScalarQuantizer::new(0.0, 1.0, 256);
+    let quantizer = ScalarQuantizer::fit(0.0, 1.0, 256);
 
     // Create an input vector.
     let input = Vector::new(vec![0.1, 0.5, -0.8, -0.3, 0.9]);
@@ -64,9 +67,23 @@ fn main() {
 }
 ```
 
+## Performance
+
+Check out the [notebooks](notebooks/) directory for information on how to evaluate the performance of the implemented
+algorithms.
+Additionally, see the content of [src/bin](src/bin/) directory for the scripts used for the evaluation.
+
+> [!TIP]
+> On a ThinkPad T14 laptop with an Intel i7-1355U CPU and 32GB RAM, the performance of the PQ algorithm for
+> quantizing one million vectors of 128 dimensions (into 16 subspaces with 256 centroids per subspace) is as follows:
+>   - Training Time: 232.5 seconds
+>   - Quantization Time: 34.1 seconds
+>   - Reconstruction Error: 0.02
+>   - Recall@10: 0.19
+
 ## Contributing
 
-Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to contribute.
+Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for details on contributing.
 
 ## License
 
 
@@ -0,0 +1,106 @@
+#[path = "utils.rs"]
+mod utils;
+
+use criterion::{black_box, criterion_group, Criterion};
+use rayon::prelude::*;
+use utils::{BENCH_TIMEOUT, NUM_VECTORS};
+use vq::bq::BinaryQuantizer;
+use vq::vector::{Vector, PARALLEL_THRESHOLD};
+
+/// Benchmark quantization on a single vector that is small enough to trigger sequential processing.
+pub fn bench_quantize_sequential(_c: &mut Criterion) {
+    // Create a vector with length less than PARALLEL_THRESHOLD.
+    let n = PARALLEL_THRESHOLD / 2;
+    let data: Vec<f32> = (0..n).map(|i| (i as f32) / (n as f32)).collect();
+    let vector = Vector::new(data);
+    let quantizer = BinaryQuantizer::fit(0.5, 0, 1);
+
+    let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT);
+    cc.bench_function("quantize_sequential", |b| {
+        b.iter(|| {
+            let result = quantizer.quantize(black_box(&vector));
+            black_box(result)
+        })
+    });
+}
+
+/// Benchmark quantization on a single vector that is large enough to trigger parallel processing.
+pub fn bench_quantize_parallel(_c: &mut Criterion) {
+    // Create a vector with length greater than PARALLEL_THRESHOLD.
+    let n = PARALLEL_THRESHOLD + 1000;
+    let data: Vec<f32> = (0..n).map(|i| (i as f32) / (n as f32)).collect();
+    let vector = Vector::new(data);
+    let quantizer = BinaryQuantizer::fit(0.5, 0, 1);
+
+    let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT);
+    cc.bench_function("quantize_parallel", |b| {
+        b.iter(|| {
+            let result = quantizer.quantize(black_box(&vector));
+            black_box(result)
+        })
+    });
+}
+
+/// Benchmark quantization of many small vectors (each processed sequentially) using a sequential outer loop.
+pub fn bench_quantize_multiple_vectors_sequential(_c: &mut Criterion) {
+    // Each vector is small enough to use sequential quantization internally.
+    let vector_size = PARALLEL_THRESHOLD / 2;
+    let vectors: Vec<Vector<f32>> = (0..NUM_VECTORS)
+        .map(|_| {
+            let data: Vec<f32> = (0..vector_size)
+                .map(|i| (i as f32) / (vector_size as f32))
+                .collect();
+            Vector::new(data)
+        })
+        .collect();
+
+    let quantizer = BinaryQuantizer::fit(0.5, 0, 1);
+
+    let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT);
+    cc.bench_function("quantize_multiple_vectors_sequential", |b| {
+        b.iter(|| {
+            let results: Vec<Vector<u8>> = vectors
+                .iter()
+                .map(|v| quantizer.quantize(black_box(v)))
+                .collect();
+            black_box(results);
+        })
+    });
+}
+
+/// Benchmark quantization of many large vectors (each using parallel quantization)
+/// and process them concurrently using a parallel outer loop.
+pub fn bench_quantize_multiple_vectors_parallel_outer(_c: &mut Criterion) {
+    // Each vector is large enough to use parallel quantization internally.
+    let vector_size = PARALLEL_THRESHOLD + 100;
+    let vectors: Vec<Vector<f32>> = (0..NUM_VECTORS)
+        .map(|_| {
+            let data: Vec<f32> = (0..vector_size)
+                .map(|i| (i as f32) / (vector_size as f32))
+                .collect();
+            Vector::new(data)
+        })
+        .collect();
+
+    let quantizer = BinaryQuantizer::fit(0.5, 0, 1);
+
+    let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT);
+    cc.bench_function("quantize_multiple_vectors_parallel_outer", |b| {
+        b.iter(|| {
+            let results: Vec<Vector<u8>> = vectors
+                .par_iter()
+                .map(|v| quantizer.quantize(black_box(v)))
+                .collect();
+            black_box(results);
+        })
+    });
+}
+
+criterion_group!(
+    benches,
+    bench_quantize_sequential,
+    bench_quantize_parallel,
+    bench_quantize_multiple_vectors_sequential,
+    bench_quantize_multiple_vectors_parallel_outer
+);
+//criterion_main!(benches);