|
| 1 | +import { describe, expect, it } from "vitest"; |
| 2 | +import { VectorIndex } from "turboquant-js"; |
| 3 | + |
| 4 | +describe("VectorIndex integration", () => { |
| 5 | + const DIMENSION = 16; |
| 6 | + |
| 7 | + function randomVector(seed: number): number[] { |
| 8 | + const v: number[] = []; |
| 9 | + let s = seed; |
| 10 | + for (let i = 0; i < DIMENSION; i++) { |
| 11 | + s = (s * 1103515245 + 12345) & 0x7fffffff; |
| 12 | + v.push((s / 0x7fffffff) * 2 - 1); |
| 13 | + } |
| 14 | + // Normalize |
| 15 | + const norm = Math.sqrt(v.reduce((sum, x) => sum + x * x, 0)); |
| 16 | + return v.map((x) => x / norm); |
| 17 | + } |
| 18 | + |
| 19 | + it("should add, search, and return ranked results", () => { |
| 20 | + const index = new VectorIndex({ dimension: DIMENSION, bits: 3, metric: "cosine" }); |
| 21 | + |
| 22 | + const vectors = Array.from({ length: 20 }, (_, i) => ({ |
| 23 | + id: `doc-${i}`, |
| 24 | + vector: randomVector(i + 1), |
| 25 | + })); |
| 26 | + |
| 27 | + index.addBatch(vectors); |
| 28 | + expect(index.size).toBe(20); |
| 29 | + |
| 30 | + const results = index.search(randomVector(1), 5); |
| 31 | + expect(results).toHaveLength(5); |
| 32 | + expect(results[0].id).toBe("doc-0"); // Self-query should rank first |
| 33 | + expect(results[0].score).toBeGreaterThan(0.5); |
| 34 | + |
| 35 | + // Scores should be in descending order |
| 36 | + for (let i = 1; i < results.length; i++) { |
| 37 | + expect(results[i].score).toBeLessThanOrEqual(results[i - 1].score); |
| 38 | + } |
| 39 | + }); |
| 40 | + |
| 41 | + it("should report memory usage with compression", () => { |
| 42 | + const index = new VectorIndex({ dimension: DIMENSION, bits: 3, metric: "cosine" }); |
| 43 | + for (let i = 0; i < 10; i++) { |
| 44 | + index.add(i, randomVector(i)); |
| 45 | + } |
| 46 | + |
| 47 | + const mem = index.memoryUsage; |
| 48 | + expect(mem.compressionRatio).toBeGreaterThan(1); |
| 49 | + expect(mem.totalBits).toBeGreaterThan(0); |
| 50 | + expect(mem.bitsPerVector).toBeGreaterThan(0); |
| 51 | + }); |
| 52 | + |
| 53 | + it("should produce different compression ratios at different bit-widths", () => { |
| 54 | + const vectors = Array.from({ length: 10 }, (_, i) => randomVector(i)); |
| 55 | + |
| 56 | + const ratios: number[] = []; |
| 57 | + for (const bits of [2, 3, 4]) { |
| 58 | + const index = new VectorIndex({ dimension: DIMENSION, bits, metric: "cosine" }); |
| 59 | + vectors.forEach((v, i) => index.add(i, v)); |
| 60 | + ratios.push(index.memoryUsage.compressionRatio); |
| 61 | + } |
| 62 | + |
| 63 | + // Higher bits = lower compression ratio |
| 64 | + expect(ratios[0]).toBeGreaterThan(ratios[1]); |
| 65 | + expect(ratios[1]).toBeGreaterThan(ratios[2]); |
| 66 | + }); |
| 67 | + |
| 68 | + it("should remove vectors by id", () => { |
| 69 | + const index = new VectorIndex({ dimension: DIMENSION, bits: 3, metric: "cosine" }); |
| 70 | + index.add("a", randomVector(1)); |
| 71 | + index.add("b", randomVector(2)); |
| 72 | + index.add("c", randomVector(3)); |
| 73 | + expect(index.size).toBe(3); |
| 74 | + |
| 75 | + const removed = index.remove("b"); |
| 76 | + expect(removed).toBe(true); |
| 77 | + expect(index.size).toBe(2); |
| 78 | + |
| 79 | + const notFound = index.remove("nonexistent"); |
| 80 | + expect(notFound).toBe(false); |
| 81 | + }); |
| 82 | +}); |
| 83 | + |
| 84 | +describe("Exact vs quantized search comparison", () => { |
| 85 | + const DIMENSION = 32; |
| 86 | + const NUM_VECTORS = 50; |
| 87 | + |
| 88 | + function randomVector(seed: number): number[] { |
| 89 | + const v: number[] = []; |
| 90 | + let s = seed; |
| 91 | + for (let i = 0; i < DIMENSION; i++) { |
| 92 | + s = (s * 1103515245 + 12345) & 0x7fffffff; |
| 93 | + v.push((s / 0x7fffffff) * 2 - 1); |
| 94 | + } |
| 95 | + const norm = Math.sqrt(v.reduce((sum, x) => sum + x * x, 0)); |
| 96 | + return v.map((x) => x / norm); |
| 97 | + } |
| 98 | + |
| 99 | + function exactCosineSearch(query: number[], vectors: number[][], topK: number) { |
| 100 | + const scores = vectors.map((v, i) => { |
| 101 | + let dot = 0; |
| 102 | + for (let j = 0; j < DIMENSION; j++) dot += query[j] * v[j]; |
| 103 | + return { id: i, score: dot }; |
| 104 | + }); |
| 105 | + scores.sort((a, b) => b.score - a.score); |
| 106 | + return scores.slice(0, topK); |
| 107 | + } |
| 108 | + |
| 109 | + it("should have meaningful overlap between exact and quantized results", () => { |
| 110 | + const index = new VectorIndex({ dimension: DIMENSION, bits: 3, metric: "cosine" }); |
| 111 | + const vectors = Array.from({ length: NUM_VECTORS }, (_, i) => randomVector(i + 100)); |
| 112 | + vectors.forEach((v, i) => index.add(i, v)); |
| 113 | + |
| 114 | + const query = randomVector(999); |
| 115 | + const quantizedResults = index.search(query, 10); |
| 116 | + const exactResults = exactCosineSearch(query, vectors, 10); |
| 117 | + |
| 118 | + const exactIds = new Set(exactResults.map((r) => r.id)); |
| 119 | + const overlap = quantizedResults.filter((r) => exactIds.has(r.id as number)).length; |
| 120 | + |
| 121 | + // At 3-bit quantization, expect at least 30% overlap in top-10 |
| 122 | + expect(overlap).toBeGreaterThanOrEqual(3); |
| 123 | + }); |
| 124 | +}); |
0 commit comments