forked from pgvector/pgvector
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhnsw.h
511 lines (423 loc) · 14.6 KB
/
hnsw.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
#ifndef HNSW_H
#define HNSW_H
#include "postgres.h"
#include "access/genam.h"
#include "access/parallel.h"
#include "lib/pairingheap.h"
#include "nodes/execnodes.h"
#include "port.h" /* for random() */
#include "utils/relptr.h"
#include "utils/sampling.h"
#include "vector.h"
#define HNSW_MAX_DIM 2000
#define HNSW_MAX_NNZ 1000
/* Support functions */
#define HNSW_DISTANCE_PROC 1
#define HNSW_NORM_PROC 2
#define HNSW_TYPE_INFO_PROC 3
#define HNSW_VERSION 1
#define HNSW_MAGIC_NUMBER 0xA953A953
#define HNSW_PAGE_ID 0xFF90
/* Preserved page numbers */
#define HNSW_METAPAGE_BLKNO 0
#define HNSW_HEAD_BLKNO 1 /* first element page */
/* Must correspond to page numbers since page lock is used */
#define HNSW_UPDATE_LOCK 0
#define HNSW_SCAN_LOCK 1
/* HNSW parameters */
#define HNSW_DEFAULT_M 16
#define HNSW_MIN_M 2
#define HNSW_MAX_M 100
#define HNSW_DEFAULT_EF_CONSTRUCTION 64
#define HNSW_MIN_EF_CONSTRUCTION 4
#define HNSW_MAX_EF_CONSTRUCTION 1000
#define HNSW_DEFAULT_EF_SEARCH 40
#define HNSW_MIN_EF_SEARCH 1
#define HNSW_MAX_EF_SEARCH 1000
/* Tuple types */
#define HNSW_ELEMENT_TUPLE_TYPE 1
#define HNSW_NEIGHBOR_TUPLE_TYPE 2
/* Make graph robust against non-HOT updates */
#define HNSW_HEAPTIDS 10
#define HNSW_UPDATE_ENTRY_GREATER 1
#define HNSW_UPDATE_ENTRY_ALWAYS 2
/* Build phases */
/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 */
#define PROGRESS_HNSW_PHASE_LOAD 2
#define HNSW_MAX_SIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData)) - sizeof(ItemIdData))
#define HNSW_TUPLE_ALLOC_SIZE BLCKSZ
#define HNSW_ELEMENT_TUPLE_SIZE(size) MAXALIGN(offsetof(HnswElementTupleData, data) + (size))
#define HNSW_NEIGHBOR_TUPLE_SIZE(level, m) MAXALIGN(offsetof(HnswNeighborTupleData, indextids) + ((level) + 2) * (m) * sizeof(ItemPointerData))
#define HNSW_NEIGHBOR_ARRAY_SIZE(lm) (offsetof(HnswNeighborArray, items) + sizeof(HnswCandidate) * (lm))
#define HnswPageGetOpaque(page) ((HnswPageOpaque) PageGetSpecialPointer(page))
#define HnswPageGetMeta(page) ((HnswMetaPageData *) PageGetContents(page))
#if PG_VERSION_NUM >= 150000
#define RandomDouble() pg_prng_double(&pg_global_prng_state)
#define SeedRandom(seed) pg_prng_seed(&pg_global_prng_state, seed)
#else
#define RandomDouble() (((double) random()) / MAX_RANDOM_VALUE)
#define SeedRandom(seed) srandom(seed)
#endif
#define HnswIsElementTuple(tup) ((tup)->type == HNSW_ELEMENT_TUPLE_TYPE)
#define HnswIsNeighborTuple(tup) ((tup)->type == HNSW_NEIGHBOR_TUPLE_TYPE)
/* 2 * M connections for ground layer */
#define HnswGetLayerM(m, layer) (layer == 0 ? (m) * 2 : (m))
/* Optimal ML from paper */
#define HnswGetMl(m) (1 / log(m))
/* Ensure fits on page and in uint8 */
#define HnswGetMaxLevel(m) Min(((BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData)) - offsetof(HnswNeighborTupleData, indextids) - sizeof(ItemIdData)) / (sizeof(ItemPointerData)) / (m)) - 2, 255)
#define HnswGetSearchCandidate(membername, ptr) pairingheap_container(HnswSearchCandidate, membername, ptr)
#define HnswGetSearchCandidateConst(membername, ptr) pairingheap_const_container(HnswSearchCandidate, membername, ptr)
#define HnswGetValue(base, element) PointerGetDatum(HnswPtrAccess(base, (element)->value))
#if PG_VERSION_NUM < 140005
#define relptr_offset(rp) ((rp).relptr_off - 1)
#endif
/* Pointer macros */
#define HnswPtrAccess(base, hp) ((base) == NULL ? (hp).ptr : relptr_access(base, (hp).relptr))
#define HnswPtrStore(base, hp, value) ((base) == NULL ? (void) ((hp).ptr = (value)) : (void) relptr_store(base, (hp).relptr, value))
#define HnswPtrIsNull(base, hp) ((base) == NULL ? (hp).ptr == NULL : relptr_is_null((hp).relptr))
#define HnswPtrEqual(base, hp1, hp2) ((base) == NULL ? (hp1).ptr == (hp2).ptr : relptr_offset((hp1).relptr) == relptr_offset((hp2).relptr))
/* For code paths dedicated to each type */
#define HnswPtrPointer(hp) (hp).ptr
#define HnswPtrOffset(hp) relptr_offset((hp).relptr)
/* Variables */
extern int hnsw_ef_search;
extern int hnsw_iterative_scan;
extern int hnsw_max_scan_tuples;
extern double hnsw_scan_mem_multiplier;
extern int hnsw_lock_tranche_id;
typedef enum HnswIterativeScanMode
{
HNSW_ITERATIVE_SCAN_OFF,
HNSW_ITERATIVE_SCAN_RELAXED,
HNSW_ITERATIVE_SCAN_STRICT
} HnswIterativeScanMode;
typedef struct HnswElementData HnswElementData;
typedef struct HnswNeighborArray HnswNeighborArray;
#define HnswPtrDeclare(type, relptrtype, ptrtype) \
relptr_declare(type, relptrtype); \
typedef union { type *ptr; relptrtype relptr; } ptrtype
/* Pointers that can be absolute or relative */
/* Use char for DatumPtr so works with Pointer */
HnswPtrDeclare(HnswElementData, HnswElementRelptr, HnswElementPtr);
HnswPtrDeclare(HnswNeighborArray, HnswNeighborArrayRelptr, HnswNeighborArrayPtr);
HnswPtrDeclare(HnswNeighborArrayPtr, HnswNeighborsRelptr, HnswNeighborsPtr);
HnswPtrDeclare(char, DatumRelptr, DatumPtr);
struct HnswElementData
{
HnswElementPtr next;
ItemPointerData heaptids[HNSW_HEAPTIDS];
uint8 heaptidsLength;
uint8 level;
uint8 deleted;
uint8 version;
uint32 hash;
HnswNeighborsPtr neighbors;
BlockNumber blkno;
OffsetNumber offno;
OffsetNumber neighborOffno;
BlockNumber neighborPage;
DatumPtr value;
LWLock lock;
};
typedef HnswElementData * HnswElement;
typedef struct HnswCandidate
{
HnswElementPtr element;
float distance;
bool closer;
} HnswCandidate;
struct HnswNeighborArray
{
int length;
bool closerSet;
HnswCandidate items[FLEXIBLE_ARRAY_MEMBER];
};
typedef struct HnswSearchCandidate
{
pairingheap_node c_node;
pairingheap_node w_node;
HnswElementPtr element;
double distance;
} HnswSearchCandidate;
/* HNSW index options */
typedef struct HnswOptions
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int m; /* number of connections */
int efConstruction; /* size of dynamic candidate list */
} HnswOptions;
typedef struct HnswGraph
{
/* Graph state */
slock_t lock;
HnswElementPtr head;
double indtuples;
/* Entry state */
LWLock entryLock;
LWLock entryWaitLock;
HnswElementPtr entryPoint;
/* Allocations state */
LWLock allocatorLock;
Size memoryUsed;
Size memoryTotal;
/* Flushed state */
LWLock flushLock;
bool flushed;
} HnswGraph;
typedef struct HnswShared
{
/* Immutable state */
Oid heaprelid;
Oid indexrelid;
bool isconcurrent;
/* Worker progress */
ConditionVariable workersdonecv;
/* Mutex for mutable state */
slock_t mutex;
/* Mutable state */
int nparticipantsdone;
double reltuples;
HnswGraph graphData;
} HnswShared;
#define ParallelTableScanFromHnswShared(shared) \
(ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(HnswShared)))
typedef struct HnswLeader
{
ParallelContext *pcxt;
int nparticipanttuplesorts;
HnswShared *hnswshared;
Snapshot snapshot;
char *hnswarea;
} HnswLeader;
typedef struct HnswAllocator
{
void *(*alloc) (Size size, void *state);
void *state;
} HnswAllocator;
typedef struct HnswTypeInfo
{
int maxDimensions;
Datum (*normalize) (PG_FUNCTION_ARGS);
void (*checkValue) (Pointer v);
} HnswTypeInfo;
typedef struct HnswSupport
{
FmgrInfo *procinfo;
FmgrInfo *normprocinfo;
Oid collation;
} HnswSupport;
typedef struct HnswQuery
{
Datum value;
} HnswQuery;
typedef struct HnswBuildState
{
/* Info */
Relation heap;
Relation index;
IndexInfo *indexInfo;
ForkNumber forkNum;
const HnswTypeInfo *typeInfo;
/* Settings */
int dimensions;
int m;
int efConstruction;
/* Statistics */
double indtuples;
double reltuples;
/* Support functions */
HnswSupport support;
/* Variables */
HnswGraph graphData;
HnswGraph *graph;
double ml;
int maxLevel;
/* Memory */
MemoryContext graphCtx;
MemoryContext tmpCtx;
HnswAllocator allocator;
/* Parallel builds */
HnswLeader *hnswleader;
HnswShared *hnswshared;
char *hnswarea;
} HnswBuildState;
typedef struct HnswMetaPageData
{
uint32 magicNumber;
uint32 version;
uint32 dimensions;
uint16 m;
uint16 efConstruction;
BlockNumber entryBlkno;
OffsetNumber entryOffno;
int16 entryLevel;
BlockNumber insertPage;
} HnswMetaPageData;
typedef HnswMetaPageData * HnswMetaPage;
typedef struct HnswPageOpaqueData
{
BlockNumber nextblkno;
uint16 unused;
uint16 page_id; /* for identification of HNSW indexes */
} HnswPageOpaqueData;
typedef HnswPageOpaqueData * HnswPageOpaque;
typedef struct HnswElementTupleData
{
uint8 type;
uint8 level;
uint8 deleted;
uint8 version;
ItemPointerData heaptids[HNSW_HEAPTIDS];
ItemPointerData neighbortid;
uint16 unused;
Vector data;
} HnswElementTupleData;
typedef HnswElementTupleData * HnswElementTuple;
typedef struct HnswNeighborTupleData
{
uint8 type;
uint8 version;
uint16 count;
ItemPointerData indextids[FLEXIBLE_ARRAY_MEMBER];
} HnswNeighborTupleData;
typedef HnswNeighborTupleData * HnswNeighborTuple;
typedef union
{
struct pointerhash_hash *pointers;
struct offsethash_hash *offsets;
struct tidhash_hash *tids;
} visited_hash;
typedef union
{
HnswElement element;
ItemPointerData indextid;
} HnswUnvisited;
typedef struct HnswScanOpaqueData
{
const HnswTypeInfo *typeInfo;
bool first;
List *w;
visited_hash v;
pairingheap *discarded;
HnswQuery q;
int m;
int64 tuples;
double previousDistance;
Size maxMemory;
MemoryContext tmpCtx;
/* Support functions */
HnswSupport support;
} HnswScanOpaqueData;
typedef HnswScanOpaqueData * HnswScanOpaque;
typedef struct HnswVacuumState
{
/* Info */
Relation index;
IndexBulkDeleteResult *stats;
IndexBulkDeleteCallback callback;
void *callback_state;
/* Settings */
int m;
int efConstruction;
/* Support functions */
HnswSupport support;
/* Variables */
struct tidhash_hash *deleted;
BufferAccessStrategy bas;
HnswNeighborTuple ntup;
HnswElementData highestPoint;
/* Memory */
MemoryContext tmpCtx;
} HnswVacuumState;
/* Methods */
int HnswGetM(Relation index);
int HnswGetEfConstruction(Relation index);
FmgrInfo *HnswOptionalProcInfo(Relation index, uint16 procnum);
void HnswInitSupport(HnswSupport * support, Relation index);
Datum HnswNormValue(const HnswTypeInfo * typeInfo, Oid collation, Datum value);
bool HnswCheckNorm(HnswSupport * support, Datum value);
Buffer HnswNewBuffer(Relation index, ForkNumber forkNum);
void HnswInitPage(Buffer buf, Page page);
void HnswInit(void);
List *HnswSearchLayer(char *base, HnswQuery * q, List *ep, int ef, int lc, Relation index, HnswSupport * support, int m, bool inserting, HnswElement skipElement, visited_hash * v, pairingheap **discarded, bool initVisited, int64 *tuples);
HnswElement HnswGetEntryPoint(Relation index);
void HnswGetMetaPageInfo(Relation index, int *m, HnswElement * entryPoint);
void *HnswAlloc(HnswAllocator * allocator, Size size);
HnswElement HnswInitElement(char *base, ItemPointer tid, int m, double ml, int maxLevel, HnswAllocator * alloc);
HnswElement HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno);
void HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint, Relation index, HnswSupport * support, int m, int efConstruction, bool existing);
HnswSearchCandidate *HnswEntryCandidate(char *base, HnswElement em, HnswQuery * q, Relation rel, HnswSupport * support, bool loadVec);
void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum, bool building);
void HnswSetNeighborTuple(char *base, HnswNeighborTuple ntup, HnswElement e, int m);
void HnswAddHeapTid(HnswElement element, ItemPointer heaptid);
HnswNeighborArray *HnswInitNeighborArray(int lm, HnswAllocator * allocator);
void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator * alloc);
bool HnswInsertTupleOnDisk(Relation index, HnswSupport * support, Datum value, ItemPointer heaptid, bool building);
void HnswUpdateNeighborsOnDisk(Relation index, HnswSupport * support, HnswElement e, int m, bool checkExisting, bool building);
void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec);
void HnswLoadElement(HnswElement element, double *distance, HnswQuery * q, Relation index, HnswSupport * support, bool loadVec, double *maxDistance);
bool HnswFormIndexValue(Datum *out, Datum *values, bool *isnull, const HnswTypeInfo * typeInfo, HnswSupport * support);
void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element);
void HnswUpdateConnection(char *base, HnswNeighborArray * neighbors, HnswElement newElement, float distance, int lm, int *updateIdx, Relation index, HnswSupport * support);
bool HnswLoadNeighborTids(HnswElement element, ItemPointerData *indextids, Relation index, int m, int lm, int lc);
void HnswInitLockTranche(void);
const HnswTypeInfo *HnswGetTypeInfo(Relation index);
PGDLLEXPORT void HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc);
/* Index access methods */
IndexBuildResult *hnswbuild(Relation heap, Relation index, IndexInfo *indexInfo);
void hnswbuildempty(Relation index);
bool hnswinsert(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heap, IndexUniqueCheck checkUnique
#if PG_VERSION_NUM >= 140000
,bool indexUnchanged
#endif
,IndexInfo *indexInfo
);
IndexBulkDeleteResult *hnswbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state);
IndexBulkDeleteResult *hnswvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats);
IndexScanDesc hnswbeginscan(Relation index, int nkeys, int norderbys);
void hnswrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys);
bool hnswgettuple(IndexScanDesc scan, ScanDirection dir);
void hnswendscan(IndexScanDesc scan);
static inline HnswNeighborArray *
HnswGetNeighbors(char *base, HnswElement element, int lc)
{
HnswNeighborArrayPtr *neighborList = HnswPtrAccess(base, element->neighbors);
Assert(element->level >= lc);
return HnswPtrAccess(base, neighborList[lc]);
}
/* Hash tables */
typedef struct TidHashEntry
{
ItemPointerData tid;
char status;
} TidHashEntry;
#define SH_PREFIX tidhash
#define SH_ELEMENT_TYPE TidHashEntry
#define SH_KEY_TYPE ItemPointerData
#define SH_SCOPE extern
#define SH_DECLARE
#include "lib/simplehash.h"
typedef struct PointerHashEntry
{
uintptr_t ptr;
char status;
} PointerHashEntry;
#define SH_PREFIX pointerhash
#define SH_ELEMENT_TYPE PointerHashEntry
#define SH_KEY_TYPE uintptr_t
#define SH_SCOPE extern
#define SH_DECLARE
#include "lib/simplehash.h"
typedef struct OffsetHashEntry
{
Size offset;
char status;
} OffsetHashEntry;
#define SH_PREFIX offsethash
#define SH_ELEMENT_TYPE OffsetHashEntry
#define SH_KEY_TYPE Size
#define SH_SCOPE extern
#define SH_DECLARE
#include "lib/simplehash.h"
#endif