|
| 1 | +// Copyright 2025 The Cockroach Authors. |
| 2 | +// |
| 3 | +// Use of this software is governed by the CockroachDB Software License |
| 4 | +// included in the /LICENSE file. |
| 5 | + |
| 6 | +package backfill_test |
| 7 | + |
| 8 | +import ( |
| 9 | + "context" |
| 10 | + "testing" |
| 11 | + |
| 12 | + "github.com/cockroachdb/cockroach/pkg/base" |
| 13 | + "github.com/cockroachdb/cockroach/pkg/kv" |
| 14 | + "github.com/cockroachdb/cockroach/pkg/sql/execinfra" |
| 15 | + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" |
| 16 | + "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" |
| 17 | + "github.com/cockroachdb/cockroach/pkg/util/leaktest" |
| 18 | + "github.com/cockroachdb/cockroach/pkg/util/log" |
| 19 | + "github.com/cockroachdb/cockroach/pkg/util/syncutil" |
| 20 | + "github.com/stretchr/testify/require" |
| 21 | +) |
| 22 | + |
| 23 | +func TestVectorColumnAndIndexBackfill(t *testing.T) { |
| 24 | + defer leaktest.AfterTest(t)() |
| 25 | + defer log.Scope(t).Close(t) |
| 26 | + |
| 27 | + // Track whether we've injected an error |
| 28 | + var errorState struct { |
| 29 | + mu syncutil.Mutex |
| 30 | + hasErrored bool |
| 31 | + } |
| 32 | + |
| 33 | + ctx := context.Background() |
| 34 | + srv, db, _ := serverutils.StartServer(t, base.TestServerArgs{ |
| 35 | + Knobs: base.TestingKnobs{ |
| 36 | + DistSQL: &execinfra.TestingKnobs{ |
| 37 | + // Inject a retriable error on the first call to the vector index backfiller. |
| 38 | + RunDuringReencodeVectorIndexEntry: func(txn *kv.Txn) error { |
| 39 | + errorState.mu.Lock() |
| 40 | + defer errorState.mu.Unlock() |
| 41 | + if !errorState.hasErrored { |
| 42 | + errorState.hasErrored = true |
| 43 | + return txn.GenerateForcedRetryableErr(ctx, "forcing a retry error") |
| 44 | + } |
| 45 | + return nil |
| 46 | + }, |
| 47 | + }, |
| 48 | + }, |
| 49 | + }) |
| 50 | + defer srv.Stopper().Stop(ctx) |
| 51 | + sqlDB := sqlutils.MakeSQLRunner(db) |
| 52 | + |
| 53 | + // Create a table with a vector column |
| 54 | + sqlDB.Exec(t, ` |
| 55 | + CREATE TABLE vectors ( |
| 56 | + id INT PRIMARY KEY, |
| 57 | + vec VECTOR(3) |
| 58 | + ) |
| 59 | + `) |
| 60 | + |
| 61 | + // Insert 200 rows with random vector data |
| 62 | + sqlDB.Exec(t, ` |
| 63 | + INSERT INTO vectors (id, vec) |
| 64 | + SELECT |
| 65 | + generate_series(1, 200) as id, |
| 66 | + ARRAY[random(), random(), random()]::vector(3) as vec |
| 67 | + `) |
| 68 | + |
| 69 | + // Create a vector index on the vector column |
| 70 | + sqlDB.Exec(t, ` |
| 71 | + CREATE VECTOR INDEX vec_idx ON vectors (vec) |
| 72 | + `) |
| 73 | + |
| 74 | + // Test vector similarity search and see that the backfiller got at |
| 75 | + // least some of the vectors in there. |
| 76 | + var matchCount int |
| 77 | + sqlDB.QueryRow(t, ` |
| 78 | + SELECT count(*) |
| 79 | + FROM ( |
| 80 | + SELECT id |
| 81 | + FROM vectors@vec_idx |
| 82 | + ORDER BY vec <-> ARRAY[0.5, 0.5, 0.5]::vector(3) |
| 83 | + LIMIT 200 |
| 84 | + ) |
| 85 | + `).Scan(&matchCount) |
| 86 | + // There's some non-determinism here where we may not find all 200 vectors. |
| 87 | + // I chose 190 as a low water mark to prevent test flakes, but it should really |
| 88 | + // be 200 in most cases. |
| 89 | + require.Greater(t, matchCount, 190, "Expected to find at least 190 similar vectors") |
| 90 | +} |
0 commit comments