Consistently use "superuser" instead of "super user"
[pgsql.git] / src / include / access / gist_private.h
blob553d364e2d1e355bd071d446c77bd16dc7442c98
1 /*-------------------------------------------------------------------------
3 * gist_private.h
4 * private declarations for GiST -- declarations related to the
5 * internal implementation of GiST, not the public API
7 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/include/access/gist_private.h
12 *-------------------------------------------------------------------------
14 #ifndef GIST_PRIVATE_H
15 #define GIST_PRIVATE_H
17 #include "access/amapi.h"
18 #include "access/gist.h"
19 #include "access/itup.h"
20 #include "lib/pairingheap.h"
21 #include "storage/bufmgr.h"
22 #include "storage/buffile.h"
23 #include "utils/hsearch.h"
24 #include "access/genam.h"
27 * Maximum number of "halves" a page can be split into in one operation.
28 * Typically a split produces 2 halves, but can be more if keys have very
29 * different lengths, or when inserting multiple keys in one operation (as
30 * when inserting downlinks to an internal node). There is no theoretical
31 * limit on this, but in practice if you get more than a handful page halves
32 * in one split, there's something wrong with the opclass implementation.
33 * GIST_MAX_SPLIT_PAGES is an arbitrary limit on that, used to size some
34 * local arrays used during split. Note that there is also a limit on the
35 * number of buffers that can be held locked at a time, MAX_SIMUL_LWLOCKS,
36 * so if you raise this higher than that limit, you'll just get a different
37 * error.
39 #define GIST_MAX_SPLIT_PAGES 75
41 /* Buffer lock modes */
42 #define GIST_SHARE BUFFER_LOCK_SHARE
43 #define GIST_EXCLUSIVE BUFFER_LOCK_EXCLUSIVE
44 #define GIST_UNLOCK BUFFER_LOCK_UNLOCK
46 typedef struct
48 BlockNumber prev;
49 uint32 freespace;
50 char tupledata[FLEXIBLE_ARRAY_MEMBER];
51 } GISTNodeBufferPage;
53 #define BUFFER_PAGE_DATA_OFFSET MAXALIGN(offsetof(GISTNodeBufferPage, tupledata))
54 /* Returns free space in node buffer page */
55 #define PAGE_FREE_SPACE(nbp) (nbp->freespace)
56 /* Checks if node buffer page is empty */
57 #define PAGE_IS_EMPTY(nbp) (nbp->freespace == BLCKSZ - BUFFER_PAGE_DATA_OFFSET)
58 /* Checks if node buffers page don't contain sufficient space for index tuple */
59 #define PAGE_NO_SPACE(nbp, itup) (PAGE_FREE_SPACE(nbp) < \
60 MAXALIGN(IndexTupleSize(itup)))
63 * GISTSTATE: information needed for any GiST index operation
65 * This struct retains call info for the index's opclass-specific support
66 * functions (per index column), plus the index's tuple descriptor.
68 * scanCxt holds the GISTSTATE itself as well as any data that lives for the
69 * lifetime of the index operation. We pass this to the support functions
70 * via fn_mcxt, so that they can store scan-lifespan data in it. The
71 * functions are invoked in tempCxt, which is typically short-lifespan
72 * (that is, it's reset after each tuple). However, tempCxt can be the same
73 * as scanCxt if we're not bothering with per-tuple context resets.
75 typedef struct GISTSTATE
77 MemoryContext scanCxt; /* context for scan-lifespan data */
78 MemoryContext tempCxt; /* short-term context for calling functions */
80 TupleDesc leafTupdesc; /* index's tuple descriptor */
81 TupleDesc nonLeafTupdesc; /* truncated tuple descriptor for non-leaf
82 * pages */
83 TupleDesc fetchTupdesc; /* tuple descriptor for tuples returned in an
84 * index-only scan */
86 FmgrInfo consistentFn[INDEX_MAX_KEYS];
87 FmgrInfo unionFn[INDEX_MAX_KEYS];
88 FmgrInfo compressFn[INDEX_MAX_KEYS];
89 FmgrInfo decompressFn[INDEX_MAX_KEYS];
90 FmgrInfo penaltyFn[INDEX_MAX_KEYS];
91 FmgrInfo picksplitFn[INDEX_MAX_KEYS];
92 FmgrInfo equalFn[INDEX_MAX_KEYS];
93 FmgrInfo distanceFn[INDEX_MAX_KEYS];
94 FmgrInfo fetchFn[INDEX_MAX_KEYS];
96 /* Collations to pass to the support functions */
97 Oid supportCollation[INDEX_MAX_KEYS];
98 } GISTSTATE;
102 * During a GiST index search, we must maintain a queue of unvisited items,
103 * which can be either individual heap tuples or whole index pages. If it
104 * is an ordered search, the unvisited items should be visited in distance
105 * order. Unvisited items at the same distance should be visited in
106 * depth-first order, that is heap items first, then lower index pages, then
107 * upper index pages; this rule avoids doing extra work during a search that
108 * ends early due to LIMIT.
110 * To perform an ordered search, we use a pairing heap to manage the
111 * distance-order queue. In a non-ordered search (no order-by operators),
112 * we use it to return heap tuples before unvisited index pages, to
113 * ensure depth-first order, but all entries are otherwise considered
114 * equal.
117 /* Individual heap tuple to be visited */
118 typedef struct GISTSearchHeapItem
120 ItemPointerData heapPtr;
121 bool recheck; /* T if quals must be rechecked */
122 bool recheckDistances; /* T if distances must be rechecked */
123 HeapTuple recontup; /* data reconstructed from the index, used in
124 * index-only scans */
125 OffsetNumber offnum; /* track offset in page to mark tuple as
126 * LP_DEAD */
127 } GISTSearchHeapItem;
129 /* Unvisited item, either index page or heap tuple */
130 typedef struct GISTSearchItem
132 pairingheap_node phNode;
133 BlockNumber blkno; /* index page number, or InvalidBlockNumber */
134 union
136 GistNSN parentlsn; /* parent page's LSN, if index page */
137 /* we must store parentlsn to detect whether a split occurred */
138 GISTSearchHeapItem heap; /* heap info, if heap tuple */
139 } data;
141 /* numberOfOrderBys entries */
142 IndexOrderByDistance distances[FLEXIBLE_ARRAY_MEMBER];
143 } GISTSearchItem;
145 #define GISTSearchItemIsHeap(item) ((item).blkno == InvalidBlockNumber)
147 #define SizeOfGISTSearchItem(n_distances) \
148 (offsetof(GISTSearchItem, distances) + \
149 sizeof(IndexOrderByDistance) * (n_distances))
152 * GISTScanOpaqueData: private state for a scan of a GiST index
154 typedef struct GISTScanOpaqueData
156 GISTSTATE *giststate; /* index information, see above */
157 Oid *orderByTypes; /* datatypes of ORDER BY expressions */
159 pairingheap *queue; /* queue of unvisited items */
160 MemoryContext queueCxt; /* context holding the queue */
161 bool qual_ok; /* false if qual can never be satisfied */
162 bool firstCall; /* true until first gistgettuple call */
164 /* pre-allocated workspace arrays */
165 IndexOrderByDistance *distances; /* output area for gistindex_keytest */
167 /* info about killed items if any (killedItems is NULL if never used) */
168 OffsetNumber *killedItems; /* offset numbers of killed items */
169 int numKilled; /* number of currently stored items */
170 BlockNumber curBlkno; /* current number of block */
171 GistNSN curPageLSN; /* pos in the WAL stream when page was read */
173 /* In a non-ordered search, returnable heap items are stored here: */
174 GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];
175 OffsetNumber nPageData; /* number of valid items in array */
176 OffsetNumber curPageData; /* next item to return */
177 MemoryContext pageDataCxt; /* context holding the fetched tuples, for
178 * index-only scans */
179 } GISTScanOpaqueData;
181 typedef GISTScanOpaqueData *GISTScanOpaque;
183 /* despite the name, gistxlogPage is not part of any xlog record */
184 typedef struct gistxlogPage
186 BlockNumber blkno;
187 int num; /* number of index tuples following */
188 } gistxlogPage;
190 /* SplitedPageLayout - gistSplit function result */
191 typedef struct SplitedPageLayout
193 gistxlogPage block;
194 IndexTupleData *list;
195 int lenlist;
196 IndexTuple itup; /* union key for page */
197 Page page; /* to operate */
198 Buffer buffer; /* to write after all proceed */
200 struct SplitedPageLayout *next;
201 } SplitedPageLayout;
204 * GISTInsertStack used for locking buffers and transfer arguments during
205 * insertion
207 typedef struct GISTInsertStack
209 /* current page */
210 BlockNumber blkno;
211 Buffer buffer;
212 Page page;
215 * log sequence number from page->lsn to recognize page update and compare
216 * it with page's nsn to recognize page split
218 GistNSN lsn;
221 * If set, we split the page while descending the tree to find an
222 * insertion target. It means that we need to retry from the parent,
223 * because the downlink of this page might no longer cover the new key.
225 bool retry_from_parent;
227 /* offset of the downlink in the parent page, that points to this page */
228 OffsetNumber downlinkoffnum;
230 /* pointer to parent */
231 struct GISTInsertStack *parent;
232 } GISTInsertStack;
234 /* Working state and results for multi-column split logic in gistsplit.c */
235 typedef struct GistSplitVector
237 GIST_SPLITVEC splitVector; /* passed to/from user PickSplit method */
239 Datum spl_lattr[INDEX_MAX_KEYS]; /* Union of subkeys in
240 * splitVector.spl_left */
241 bool spl_lisnull[INDEX_MAX_KEYS];
243 Datum spl_rattr[INDEX_MAX_KEYS]; /* Union of subkeys in
244 * splitVector.spl_right */
245 bool spl_risnull[INDEX_MAX_KEYS];
247 bool *spl_dontcare; /* flags tuples which could go to either side
248 * of the split for zero penalty */
249 } GistSplitVector;
251 typedef struct
253 Relation r;
254 Relation heapRel;
255 Size freespace; /* free space to be left */
256 bool is_build;
258 GISTInsertStack *stack;
259 } GISTInsertState;
261 /* root page of a gist index */
262 #define GIST_ROOT_BLKNO 0
265 * Before PostgreSQL 9.1, we used to rely on so-called "invalid tuples" on
266 * inner pages to finish crash recovery of incomplete page splits. If a crash
267 * happened in the middle of a page split, so that the downlink pointers were
268 * not yet inserted, crash recovery inserted a special downlink pointer. The
269 * semantics of an invalid tuple was that it if you encounter one in a scan,
270 * it must always be followed, because we don't know if the tuples on the
271 * child page match or not.
273 * We no longer create such invalid tuples, we now mark the left-half of such
274 * an incomplete split with the F_FOLLOW_RIGHT flag instead, and finish the
275 * split properly the next time we need to insert on that page. To retain
276 * on-disk compatibility for the sake of pg_upgrade, we still store 0xffff as
277 * the offset number of all inner tuples. If we encounter any invalid tuples
278 * with 0xfffe during insertion, we throw an error, though scans still handle
279 * them. You should only encounter invalid tuples if you pg_upgrade a pre-9.1
280 * gist index which already has invalid tuples in it because of a crash. That
281 * should be rare, and you are recommended to REINDEX anyway if you have any
282 * invalid tuples in an index, so throwing an error is as far as we go with
283 * supporting that.
285 #define TUPLE_IS_VALID 0xffff
286 #define TUPLE_IS_INVALID 0xfffe
288 #define GistTupleIsInvalid(itup) ( ItemPointerGetOffsetNumber( &((itup)->t_tid) ) == TUPLE_IS_INVALID )
289 #define GistTupleSetValid(itup) ItemPointerSetOffsetNumber( &((itup)->t_tid), TUPLE_IS_VALID )
295 * A buffer attached to an internal node, used when building an index in
296 * buffering mode.
298 typedef struct
300 BlockNumber nodeBlocknum; /* index block # this buffer is for */
301 int32 blocksCount; /* current # of blocks occupied by buffer */
303 BlockNumber pageBlocknum; /* temporary file block # */
304 GISTNodeBufferPage *pageBuffer; /* in-memory buffer page */
306 /* is this buffer queued for emptying? */
307 bool queuedForEmptying;
309 /* is this a temporary copy, not in the hash table? */
310 bool isTemp;
312 int level; /* 0 == leaf */
313 } GISTNodeBuffer;
316 * Does specified level have buffers? (Beware of multiple evaluation of
317 * arguments.)
319 #define LEVEL_HAS_BUFFERS(nlevel, gfbb) \
320 ((nlevel) != 0 && (nlevel) % (gfbb)->levelStep == 0 && \
321 (nlevel) != (gfbb)->rootlevel)
323 /* Is specified buffer at least half-filled (should be queued for emptying)? */
324 #define BUFFER_HALF_FILLED(nodeBuffer, gfbb) \
325 ((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer / 2)
328 * Is specified buffer full? Our buffers can actually grow indefinitely,
329 * beyond the "maximum" size, so this just means whether the buffer has grown
330 * beyond the nominal maximum size.
332 #define BUFFER_OVERFLOWED(nodeBuffer, gfbb) \
333 ((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer)
336 * Data structure with general information about build buffers.
338 typedef struct GISTBuildBuffers
340 /* Persistent memory context for the buffers and metadata. */
341 MemoryContext context;
343 BufFile *pfile; /* Temporary file to store buffers in */
344 long nFileBlocks; /* Current size of the temporary file */
347 * resizable array of free blocks.
349 long *freeBlocks;
350 int nFreeBlocks; /* # of currently free blocks in the array */
351 int freeBlocksLen; /* current allocated length of the array */
353 /* Hash for buffers by block number */
354 HTAB *nodeBuffersTab;
356 /* List of buffers scheduled for emptying */
357 List *bufferEmptyingQueue;
360 * Parameters to the buffering build algorithm. levelStep determines which
361 * levels in the tree have buffers, and pagesPerBuffer determines how
362 * large each buffer is.
364 int levelStep;
365 int pagesPerBuffer;
367 /* Array of lists of buffers on each level, for final emptying */
368 List **buffersOnLevels;
369 int buffersOnLevelsLen;
372 * Dynamically-sized array of buffers that currently have their last page
373 * loaded in main memory.
375 GISTNodeBuffer **loadedBuffers;
376 int loadedBuffersCount; /* # of entries in loadedBuffers */
377 int loadedBuffersLen; /* allocated size of loadedBuffers */
379 /* Level of the current root node (= height of the index tree - 1) */
380 int rootlevel;
381 } GISTBuildBuffers;
383 /* GiSTOptions->buffering_mode values */
384 typedef enum GistOptBufferingMode
386 GIST_OPTION_BUFFERING_AUTO,
387 GIST_OPTION_BUFFERING_ON,
388 GIST_OPTION_BUFFERING_OFF
389 } GistOptBufferingMode;
392 * Storage type for GiST's reloptions
394 typedef struct GiSTOptions
396 int32 vl_len_; /* varlena header (do not touch directly!) */
397 int fillfactor; /* page fill factor in percent (0..100) */
398 GistOptBufferingMode buffering_mode; /* buffering build mode */
399 } GiSTOptions;
401 /* gist.c */
402 extern void gistbuildempty(Relation index);
403 extern bool gistinsert(Relation r, Datum *values, bool *isnull,
404 ItemPointer ht_ctid, Relation heapRel,
405 IndexUniqueCheck checkUnique,
406 bool indexUnchanged,
407 struct IndexInfo *indexInfo);
408 extern MemoryContext createTempGistContext(void);
409 extern GISTSTATE *initGISTstate(Relation index);
410 extern void freeGISTstate(GISTSTATE *giststate);
411 extern void gistdoinsert(Relation r,
412 IndexTuple itup,
413 Size freespace,
414 GISTSTATE *giststate,
415 Relation heapRel,
416 bool is_build);
418 /* A List of these is returned from gistplacetopage() in *splitinfo */
419 typedef struct
421 Buffer buf; /* the split page "half" */
422 IndexTuple downlink; /* downlink for this half. */
423 } GISTPageSplitInfo;
425 extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
426 Buffer buffer,
427 IndexTuple *itup, int ntup,
428 OffsetNumber oldoffnum, BlockNumber *newblkno,
429 Buffer leftchildbuf,
430 List **splitinfo,
431 bool markfollowright,
432 Relation heapRel,
433 bool is_build);
435 extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
436 int len, GISTSTATE *giststate);
438 /* gistxlog.c */
439 extern XLogRecPtr gistXLogPageDelete(Buffer buffer,
440 FullTransactionId xid, Buffer parentBuffer,
441 OffsetNumber downlinkOffset);
443 extern void gistXLogPageReuse(Relation rel, BlockNumber blkno,
444 FullTransactionId latestRemovedXid);
446 extern XLogRecPtr gistXLogUpdate(Buffer buffer,
447 OffsetNumber *todelete, int ntodelete,
448 IndexTuple *itup, int ntup,
449 Buffer leftchild);
451 extern XLogRecPtr gistXLogDelete(Buffer buffer, OffsetNumber *todelete,
452 int ntodelete, TransactionId latestRemovedXid);
454 extern XLogRecPtr gistXLogSplit(bool page_is_leaf,
455 SplitedPageLayout *dist,
456 BlockNumber origrlink, GistNSN oldnsn,
457 Buffer leftchild, bool markfollowright);
459 extern XLogRecPtr gistXLogAssignLSN(void);
461 /* gistget.c */
462 extern bool gistgettuple(IndexScanDesc scan, ScanDirection dir);
463 extern int64 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
464 extern bool gistcanreturn(Relation index, int attno);
466 /* gistvalidate.c */
467 extern bool gistvalidate(Oid opclassoid);
468 extern void gistadjustmembers(Oid opfamilyoid,
469 Oid opclassoid,
470 List *operators,
471 List *functions);
473 /* gistutil.c */
475 #define GiSTPageSize \
476 ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GISTPageOpaqueData)) )
478 #define GIST_MIN_FILLFACTOR 10
479 #define GIST_DEFAULT_FILLFACTOR 90
481 extern bytea *gistoptions(Datum reloptions, bool validate);
482 extern bool gistproperty(Oid index_oid, int attno,
483 IndexAMProperty prop, const char *propname,
484 bool *res, bool *isnull);
485 extern bool gistfitpage(IndexTuple *itvec, int len);
486 extern bool gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace);
487 extern void gistcheckpage(Relation rel, Buffer buf);
488 extern Buffer gistNewBuffer(Relation r);
489 extern bool gistPageRecyclable(Page page);
490 extern void gistfillbuffer(Page page, IndexTuple *itup, int len,
491 OffsetNumber off);
492 extern IndexTuple *gistextractpage(Page page, int *len /* out */ );
493 extern IndexTuple *gistjoinvector(IndexTuple *itvec, int *len,
494 IndexTuple *additvec, int addlen);
495 extern IndexTupleData *gistfillitupvec(IndexTuple *vec, int veclen, int *memlen);
497 extern IndexTuple gistunion(Relation r, IndexTuple *itvec,
498 int len, GISTSTATE *giststate);
499 extern IndexTuple gistgetadjusted(Relation r,
500 IndexTuple oldtup,
501 IndexTuple addtup,
502 GISTSTATE *giststate);
503 extern IndexTuple gistFormTuple(GISTSTATE *giststate,
504 Relation r, Datum *attdata, bool *isnull, bool isleaf);
505 extern void gistCompressValues(GISTSTATE *giststate, Relation r,
506 Datum *attdata, bool *isnull, bool isleaf, Datum *compatt);
508 extern OffsetNumber gistchoose(Relation r, Page p,
509 IndexTuple it,
510 GISTSTATE *giststate);
512 extern void GISTInitBuffer(Buffer b, uint32 f);
513 extern void gistinitpage(Page page, uint32 f);
514 extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
515 Datum k, Relation r, Page pg, OffsetNumber o,
516 bool l, bool isNull);
518 extern float gistpenalty(GISTSTATE *giststate, int attno,
519 GISTENTRY *key1, bool isNull1,
520 GISTENTRY *key2, bool isNull2);
521 extern void gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
522 Datum *attr, bool *isnull);
523 extern bool gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b);
524 extern void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
525 OffsetNumber o, GISTENTRY *attdata, bool *isnull);
526 extern HeapTuple gistFetchTuple(GISTSTATE *giststate, Relation r,
527 IndexTuple tuple);
528 extern void gistMakeUnionKey(GISTSTATE *giststate, int attno,
529 GISTENTRY *entry1, bool isnull1,
530 GISTENTRY *entry2, bool isnull2,
531 Datum *dst, bool *dstisnull);
533 extern XLogRecPtr gistGetFakeLSN(Relation rel);
535 /* gistvacuum.c */
536 extern IndexBulkDeleteResult *gistbulkdelete(IndexVacuumInfo *info,
537 IndexBulkDeleteResult *stats,
538 IndexBulkDeleteCallback callback,
539 void *callback_state);
540 extern IndexBulkDeleteResult *gistvacuumcleanup(IndexVacuumInfo *info,
541 IndexBulkDeleteResult *stats);
543 /* gistsplit.c */
544 extern void gistSplitByKey(Relation r, Page page, IndexTuple *itup,
545 int len, GISTSTATE *giststate,
546 GistSplitVector *v,
547 int attno);
549 /* gistbuild.c */
550 extern IndexBuildResult *gistbuild(Relation heap, Relation index,
551 struct IndexInfo *indexInfo);
552 extern void gistValidateBufferingOption(const char *value);
554 /* gistbuildbuffers.c */
555 extern GISTBuildBuffers *gistInitBuildBuffers(int pagesPerBuffer, int levelStep,
556 int maxLevel);
557 extern GISTNodeBuffer *gistGetNodeBuffer(GISTBuildBuffers *gfbb,
558 GISTSTATE *giststate,
559 BlockNumber blkno, int level);
560 extern void gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb,
561 GISTNodeBuffer *nodeBuffer, IndexTuple item);
562 extern bool gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb,
563 GISTNodeBuffer *nodeBuffer, IndexTuple *item);
564 extern void gistFreeBuildBuffers(GISTBuildBuffers *gfbb);
565 extern void gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb,
566 GISTSTATE *giststate, Relation r,
567 int level, Buffer buffer,
568 List *splitinfo);
569 extern void gistUnloadNodeBuffers(GISTBuildBuffers *gfbb);
571 #endif /* GIST_PRIVATE_H */