1 /*-------------------------------------------------------------------------
4 * private declarations for GiST -- declarations related to the
5 * internal implementation of GiST, not the public API
7 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/include/access/gist_private.h
12 *-------------------------------------------------------------------------
14 #ifndef GIST_PRIVATE_H
15 #define GIST_PRIVATE_H
17 #include "access/amapi.h"
18 #include "access/gist.h"
19 #include "access/itup.h"
20 #include "lib/pairingheap.h"
21 #include "storage/bufmgr.h"
22 #include "storage/buffile.h"
23 #include "utils/hsearch.h"
24 #include "access/genam.h"
27 * Maximum number of "halves" a page can be split into in one operation.
28 * Typically a split produces 2 halves, but can be more if keys have very
29 * different lengths, or when inserting multiple keys in one operation (as
30 * when inserting downlinks to an internal node). There is no theoretical
31 * limit on this, but in practice if you get more than a handful page halves
32 * in one split, there's something wrong with the opclass implementation.
33 * GIST_MAX_SPLIT_PAGES is an arbitrary limit on that, used to size some
34 * local arrays used during split. Note that there is also a limit on the
35 * number of buffers that can be held locked at a time, MAX_SIMUL_LWLOCKS,
36 * so if you raise this higher than that limit, you'll just get a different
39 #define GIST_MAX_SPLIT_PAGES 75
41 /* Buffer lock modes */
42 #define GIST_SHARE BUFFER_LOCK_SHARE
43 #define GIST_EXCLUSIVE BUFFER_LOCK_EXCLUSIVE
44 #define GIST_UNLOCK BUFFER_LOCK_UNLOCK
50 char tupledata
[FLEXIBLE_ARRAY_MEMBER
];
53 #define BUFFER_PAGE_DATA_OFFSET MAXALIGN(offsetof(GISTNodeBufferPage, tupledata))
54 /* Returns free space in node buffer page */
55 #define PAGE_FREE_SPACE(nbp) (nbp->freespace)
56 /* Checks if node buffer page is empty */
57 #define PAGE_IS_EMPTY(nbp) (nbp->freespace == BLCKSZ - BUFFER_PAGE_DATA_OFFSET)
58 /* Checks if node buffers page don't contain sufficient space for index tuple */
59 #define PAGE_NO_SPACE(nbp, itup) (PAGE_FREE_SPACE(nbp) < \
60 MAXALIGN(IndexTupleSize(itup)))
63 * GISTSTATE: information needed for any GiST index operation
65 * This struct retains call info for the index's opclass-specific support
66 * functions (per index column), plus the index's tuple descriptor.
68 * scanCxt holds the GISTSTATE itself as well as any data that lives for the
69 * lifetime of the index operation. We pass this to the support functions
70 * via fn_mcxt, so that they can store scan-lifespan data in it. The
71 * functions are invoked in tempCxt, which is typically short-lifespan
72 * (that is, it's reset after each tuple). However, tempCxt can be the same
73 * as scanCxt if we're not bothering with per-tuple context resets.
75 typedef struct GISTSTATE
77 MemoryContext scanCxt
; /* context for scan-lifespan data */
78 MemoryContext tempCxt
; /* short-term context for calling functions */
80 TupleDesc leafTupdesc
; /* index's tuple descriptor */
81 TupleDesc nonLeafTupdesc
; /* truncated tuple descriptor for non-leaf
83 TupleDesc fetchTupdesc
; /* tuple descriptor for tuples returned in an
86 FmgrInfo consistentFn
[INDEX_MAX_KEYS
];
87 FmgrInfo unionFn
[INDEX_MAX_KEYS
];
88 FmgrInfo compressFn
[INDEX_MAX_KEYS
];
89 FmgrInfo decompressFn
[INDEX_MAX_KEYS
];
90 FmgrInfo penaltyFn
[INDEX_MAX_KEYS
];
91 FmgrInfo picksplitFn
[INDEX_MAX_KEYS
];
92 FmgrInfo equalFn
[INDEX_MAX_KEYS
];
93 FmgrInfo distanceFn
[INDEX_MAX_KEYS
];
94 FmgrInfo fetchFn
[INDEX_MAX_KEYS
];
96 /* Collations to pass to the support functions */
97 Oid supportCollation
[INDEX_MAX_KEYS
];
102 * During a GiST index search, we must maintain a queue of unvisited items,
103 * which can be either individual heap tuples or whole index pages. If it
104 * is an ordered search, the unvisited items should be visited in distance
105 * order. Unvisited items at the same distance should be visited in
106 * depth-first order, that is heap items first, then lower index pages, then
107 * upper index pages; this rule avoids doing extra work during a search that
108 * ends early due to LIMIT.
110 * To perform an ordered search, we use a pairing heap to manage the
111 * distance-order queue. In a non-ordered search (no order-by operators),
112 * we use it to return heap tuples before unvisited index pages, to
113 * ensure depth-first order, but all entries are otherwise considered
117 /* Individual heap tuple to be visited */
118 typedef struct GISTSearchHeapItem
120 ItemPointerData heapPtr
;
121 bool recheck
; /* T if quals must be rechecked */
122 bool recheckDistances
; /* T if distances must be rechecked */
123 HeapTuple recontup
; /* data reconstructed from the index, used in
124 * index-only scans */
125 OffsetNumber offnum
; /* track offset in page to mark tuple as
127 } GISTSearchHeapItem
;
129 /* Unvisited item, either index page or heap tuple */
130 typedef struct GISTSearchItem
132 pairingheap_node phNode
;
133 BlockNumber blkno
; /* index page number, or InvalidBlockNumber */
136 GistNSN parentlsn
; /* parent page's LSN, if index page */
137 /* we must store parentlsn to detect whether a split occurred */
138 GISTSearchHeapItem heap
; /* heap info, if heap tuple */
141 /* numberOfOrderBys entries */
142 IndexOrderByDistance distances
[FLEXIBLE_ARRAY_MEMBER
];
145 #define GISTSearchItemIsHeap(item) ((item).blkno == InvalidBlockNumber)
147 #define SizeOfGISTSearchItem(n_distances) \
148 (offsetof(GISTSearchItem, distances) + \
149 sizeof(IndexOrderByDistance) * (n_distances))
152 * GISTScanOpaqueData: private state for a scan of a GiST index
154 typedef struct GISTScanOpaqueData
156 GISTSTATE
*giststate
; /* index information, see above */
157 Oid
*orderByTypes
; /* datatypes of ORDER BY expressions */
159 pairingheap
*queue
; /* queue of unvisited items */
160 MemoryContext queueCxt
; /* context holding the queue */
161 bool qual_ok
; /* false if qual can never be satisfied */
162 bool firstCall
; /* true until first gistgettuple call */
164 /* pre-allocated workspace arrays */
165 IndexOrderByDistance
*distances
; /* output area for gistindex_keytest */
167 /* info about killed items if any (killedItems is NULL if never used) */
168 OffsetNumber
*killedItems
; /* offset numbers of killed items */
169 int numKilled
; /* number of currently stored items */
170 BlockNumber curBlkno
; /* current number of block */
171 GistNSN curPageLSN
; /* pos in the WAL stream when page was read */
173 /* In a non-ordered search, returnable heap items are stored here: */
174 GISTSearchHeapItem pageData
[BLCKSZ
/ sizeof(IndexTupleData
)];
175 OffsetNumber nPageData
; /* number of valid items in array */
176 OffsetNumber curPageData
; /* next item to return */
177 MemoryContext pageDataCxt
; /* context holding the fetched tuples, for
178 * index-only scans */
179 } GISTScanOpaqueData
;
181 typedef GISTScanOpaqueData
*GISTScanOpaque
;
183 /* despite the name, gistxlogPage is not part of any xlog record */
184 typedef struct gistxlogPage
187 int num
; /* number of index tuples following */
190 /* SplitedPageLayout - gistSplit function result */
191 typedef struct SplitedPageLayout
194 IndexTupleData
*list
;
196 IndexTuple itup
; /* union key for page */
197 Page page
; /* to operate */
198 Buffer buffer
; /* to write after all proceed */
200 struct SplitedPageLayout
*next
;
204 * GISTInsertStack used for locking buffers and transfer arguments during
207 typedef struct GISTInsertStack
215 * log sequence number from page->lsn to recognize page update and compare
216 * it with page's nsn to recognize page split
221 * If set, we split the page while descending the tree to find an
222 * insertion target. It means that we need to retry from the parent,
223 * because the downlink of this page might no longer cover the new key.
225 bool retry_from_parent
;
227 /* offset of the downlink in the parent page, that points to this page */
228 OffsetNumber downlinkoffnum
;
230 /* pointer to parent */
231 struct GISTInsertStack
*parent
;
234 /* Working state and results for multi-column split logic in gistsplit.c */
235 typedef struct GistSplitVector
237 GIST_SPLITVEC splitVector
; /* passed to/from user PickSplit method */
239 Datum spl_lattr
[INDEX_MAX_KEYS
]; /* Union of subkeys in
240 * splitVector.spl_left */
241 bool spl_lisnull
[INDEX_MAX_KEYS
];
243 Datum spl_rattr
[INDEX_MAX_KEYS
]; /* Union of subkeys in
244 * splitVector.spl_right */
245 bool spl_risnull
[INDEX_MAX_KEYS
];
247 bool *spl_dontcare
; /* flags tuples which could go to either side
248 * of the split for zero penalty */
255 Size freespace
; /* free space to be left */
258 GISTInsertStack
*stack
;
261 /* root page of a gist index */
262 #define GIST_ROOT_BLKNO 0
265 * Before PostgreSQL 9.1, we used to rely on so-called "invalid tuples" on
266 * inner pages to finish crash recovery of incomplete page splits. If a crash
267 * happened in the middle of a page split, so that the downlink pointers were
268 * not yet inserted, crash recovery inserted a special downlink pointer. The
269 * semantics of an invalid tuple was that it if you encounter one in a scan,
270 * it must always be followed, because we don't know if the tuples on the
271 * child page match or not.
273 * We no longer create such invalid tuples, we now mark the left-half of such
274 * an incomplete split with the F_FOLLOW_RIGHT flag instead, and finish the
275 * split properly the next time we need to insert on that page. To retain
276 * on-disk compatibility for the sake of pg_upgrade, we still store 0xffff as
277 * the offset number of all inner tuples. If we encounter any invalid tuples
278 * with 0xfffe during insertion, we throw an error, though scans still handle
279 * them. You should only encounter invalid tuples if you pg_upgrade a pre-9.1
280 * gist index which already has invalid tuples in it because of a crash. That
281 * should be rare, and you are recommended to REINDEX anyway if you have any
282 * invalid tuples in an index, so throwing an error is as far as we go with
285 #define TUPLE_IS_VALID 0xffff
286 #define TUPLE_IS_INVALID 0xfffe
288 #define GistTupleIsInvalid(itup) ( ItemPointerGetOffsetNumber( &((itup)->t_tid) ) == TUPLE_IS_INVALID )
289 #define GistTupleSetValid(itup) ItemPointerSetOffsetNumber( &((itup)->t_tid), TUPLE_IS_VALID )
295 * A buffer attached to an internal node, used when building an index in
300 BlockNumber nodeBlocknum
; /* index block # this buffer is for */
301 int32 blocksCount
; /* current # of blocks occupied by buffer */
303 BlockNumber pageBlocknum
; /* temporary file block # */
304 GISTNodeBufferPage
*pageBuffer
; /* in-memory buffer page */
306 /* is this buffer queued for emptying? */
307 bool queuedForEmptying
;
309 /* is this a temporary copy, not in the hash table? */
312 int level
; /* 0 == leaf */
316 * Does specified level have buffers? (Beware of multiple evaluation of
319 #define LEVEL_HAS_BUFFERS(nlevel, gfbb) \
320 ((nlevel) != 0 && (nlevel) % (gfbb)->levelStep == 0 && \
321 (nlevel) != (gfbb)->rootlevel)
323 /* Is specified buffer at least half-filled (should be queued for emptying)? */
324 #define BUFFER_HALF_FILLED(nodeBuffer, gfbb) \
325 ((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer / 2)
328 * Is specified buffer full? Our buffers can actually grow indefinitely,
329 * beyond the "maximum" size, so this just means whether the buffer has grown
330 * beyond the nominal maximum size.
332 #define BUFFER_OVERFLOWED(nodeBuffer, gfbb) \
333 ((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer)
336 * Data structure with general information about build buffers.
338 typedef struct GISTBuildBuffers
340 /* Persistent memory context for the buffers and metadata. */
341 MemoryContext context
;
343 BufFile
*pfile
; /* Temporary file to store buffers in */
344 long nFileBlocks
; /* Current size of the temporary file */
347 * resizable array of free blocks.
350 int nFreeBlocks
; /* # of currently free blocks in the array */
351 int freeBlocksLen
; /* current allocated length of the array */
353 /* Hash for buffers by block number */
354 HTAB
*nodeBuffersTab
;
356 /* List of buffers scheduled for emptying */
357 List
*bufferEmptyingQueue
;
360 * Parameters to the buffering build algorithm. levelStep determines which
361 * levels in the tree have buffers, and pagesPerBuffer determines how
362 * large each buffer is.
367 /* Array of lists of buffers on each level, for final emptying */
368 List
**buffersOnLevels
;
369 int buffersOnLevelsLen
;
372 * Dynamically-sized array of buffers that currently have their last page
373 * loaded in main memory.
375 GISTNodeBuffer
**loadedBuffers
;
376 int loadedBuffersCount
; /* # of entries in loadedBuffers */
377 int loadedBuffersLen
; /* allocated size of loadedBuffers */
379 /* Level of the current root node (= height of the index tree - 1) */
383 /* GiSTOptions->buffering_mode values */
384 typedef enum GistOptBufferingMode
386 GIST_OPTION_BUFFERING_AUTO
,
387 GIST_OPTION_BUFFERING_ON
,
388 GIST_OPTION_BUFFERING_OFF
389 } GistOptBufferingMode
;
392 * Storage type for GiST's reloptions
394 typedef struct GiSTOptions
396 int32 vl_len_
; /* varlena header (do not touch directly!) */
397 int fillfactor
; /* page fill factor in percent (0..100) */
398 GistOptBufferingMode buffering_mode
; /* buffering build mode */
402 extern void gistbuildempty(Relation index
);
403 extern bool gistinsert(Relation r
, Datum
*values
, bool *isnull
,
404 ItemPointer ht_ctid
, Relation heapRel
,
405 IndexUniqueCheck checkUnique
,
407 struct IndexInfo
*indexInfo
);
408 extern MemoryContext
createTempGistContext(void);
409 extern GISTSTATE
*initGISTstate(Relation index
);
410 extern void freeGISTstate(GISTSTATE
*giststate
);
411 extern void gistdoinsert(Relation r
,
414 GISTSTATE
*giststate
,
418 /* A List of these is returned from gistplacetopage() in *splitinfo */
421 Buffer buf
; /* the split page "half" */
422 IndexTuple downlink
; /* downlink for this half. */
425 extern bool gistplacetopage(Relation rel
, Size freespace
, GISTSTATE
*giststate
,
427 IndexTuple
*itup
, int ntup
,
428 OffsetNumber oldoffnum
, BlockNumber
*newblkno
,
431 bool markfollowright
,
435 extern SplitedPageLayout
*gistSplit(Relation r
, Page page
, IndexTuple
*itup
,
436 int len
, GISTSTATE
*giststate
);
439 extern XLogRecPtr
gistXLogPageDelete(Buffer buffer
,
440 FullTransactionId xid
, Buffer parentBuffer
,
441 OffsetNumber downlinkOffset
);
443 extern void gistXLogPageReuse(Relation rel
, BlockNumber blkno
,
444 FullTransactionId latestRemovedXid
);
446 extern XLogRecPtr
gistXLogUpdate(Buffer buffer
,
447 OffsetNumber
*todelete
, int ntodelete
,
448 IndexTuple
*itup
, int ntup
,
451 extern XLogRecPtr
gistXLogDelete(Buffer buffer
, OffsetNumber
*todelete
,
452 int ntodelete
, TransactionId latestRemovedXid
);
454 extern XLogRecPtr
gistXLogSplit(bool page_is_leaf
,
455 SplitedPageLayout
*dist
,
456 BlockNumber origrlink
, GistNSN oldnsn
,
457 Buffer leftchild
, bool markfollowright
);
459 extern XLogRecPtr
gistXLogAssignLSN(void);
462 extern bool gistgettuple(IndexScanDesc scan
, ScanDirection dir
);
463 extern int64
gistgetbitmap(IndexScanDesc scan
, TIDBitmap
*tbm
);
464 extern bool gistcanreturn(Relation index
, int attno
);
467 extern bool gistvalidate(Oid opclassoid
);
468 extern void gistadjustmembers(Oid opfamilyoid
,
475 #define GiSTPageSize \
476 ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GISTPageOpaqueData)) )
478 #define GIST_MIN_FILLFACTOR 10
479 #define GIST_DEFAULT_FILLFACTOR 90
481 extern bytea
*gistoptions(Datum reloptions
, bool validate
);
482 extern bool gistproperty(Oid index_oid
, int attno
,
483 IndexAMProperty prop
, const char *propname
,
484 bool *res
, bool *isnull
);
485 extern bool gistfitpage(IndexTuple
*itvec
, int len
);
486 extern bool gistnospace(Page page
, IndexTuple
*itvec
, int len
, OffsetNumber todelete
, Size freespace
);
487 extern void gistcheckpage(Relation rel
, Buffer buf
);
488 extern Buffer
gistNewBuffer(Relation r
);
489 extern bool gistPageRecyclable(Page page
);
490 extern void gistfillbuffer(Page page
, IndexTuple
*itup
, int len
,
492 extern IndexTuple
*gistextractpage(Page page
, int *len
/* out */ );
493 extern IndexTuple
*gistjoinvector(IndexTuple
*itvec
, int *len
,
494 IndexTuple
*additvec
, int addlen
);
495 extern IndexTupleData
*gistfillitupvec(IndexTuple
*vec
, int veclen
, int *memlen
);
497 extern IndexTuple
gistunion(Relation r
, IndexTuple
*itvec
,
498 int len
, GISTSTATE
*giststate
);
499 extern IndexTuple
gistgetadjusted(Relation r
,
502 GISTSTATE
*giststate
);
503 extern IndexTuple
gistFormTuple(GISTSTATE
*giststate
,
504 Relation r
, Datum
*attdata
, bool *isnull
, bool isleaf
);
505 extern void gistCompressValues(GISTSTATE
*giststate
, Relation r
,
506 Datum
*attdata
, bool *isnull
, bool isleaf
, Datum
*compatt
);
508 extern OffsetNumber
gistchoose(Relation r
, Page p
,
510 GISTSTATE
*giststate
);
512 extern void GISTInitBuffer(Buffer b
, uint32 f
);
513 extern void gistinitpage(Page page
, uint32 f
);
514 extern void gistdentryinit(GISTSTATE
*giststate
, int nkey
, GISTENTRY
*e
,
515 Datum k
, Relation r
, Page pg
, OffsetNumber o
,
516 bool l
, bool isNull
);
518 extern float gistpenalty(GISTSTATE
*giststate
, int attno
,
519 GISTENTRY
*key1
, bool isNull1
,
520 GISTENTRY
*key2
, bool isNull2
);
521 extern void gistMakeUnionItVec(GISTSTATE
*giststate
, IndexTuple
*itvec
, int len
,
522 Datum
*attr
, bool *isnull
);
523 extern bool gistKeyIsEQ(GISTSTATE
*giststate
, int attno
, Datum a
, Datum b
);
524 extern void gistDeCompressAtt(GISTSTATE
*giststate
, Relation r
, IndexTuple tuple
, Page p
,
525 OffsetNumber o
, GISTENTRY
*attdata
, bool *isnull
);
526 extern HeapTuple
gistFetchTuple(GISTSTATE
*giststate
, Relation r
,
528 extern void gistMakeUnionKey(GISTSTATE
*giststate
, int attno
,
529 GISTENTRY
*entry1
, bool isnull1
,
530 GISTENTRY
*entry2
, bool isnull2
,
531 Datum
*dst
, bool *dstisnull
);
533 extern XLogRecPtr
gistGetFakeLSN(Relation rel
);
536 extern IndexBulkDeleteResult
*gistbulkdelete(IndexVacuumInfo
*info
,
537 IndexBulkDeleteResult
*stats
,
538 IndexBulkDeleteCallback callback
,
539 void *callback_state
);
540 extern IndexBulkDeleteResult
*gistvacuumcleanup(IndexVacuumInfo
*info
,
541 IndexBulkDeleteResult
*stats
);
544 extern void gistSplitByKey(Relation r
, Page page
, IndexTuple
*itup
,
545 int len
, GISTSTATE
*giststate
,
550 extern IndexBuildResult
*gistbuild(Relation heap
, Relation index
,
551 struct IndexInfo
*indexInfo
);
552 extern void gistValidateBufferingOption(const char *value
);
554 /* gistbuildbuffers.c */
555 extern GISTBuildBuffers
*gistInitBuildBuffers(int pagesPerBuffer
, int levelStep
,
557 extern GISTNodeBuffer
*gistGetNodeBuffer(GISTBuildBuffers
*gfbb
,
558 GISTSTATE
*giststate
,
559 BlockNumber blkno
, int level
);
560 extern void gistPushItupToNodeBuffer(GISTBuildBuffers
*gfbb
,
561 GISTNodeBuffer
*nodeBuffer
, IndexTuple item
);
562 extern bool gistPopItupFromNodeBuffer(GISTBuildBuffers
*gfbb
,
563 GISTNodeBuffer
*nodeBuffer
, IndexTuple
*item
);
564 extern void gistFreeBuildBuffers(GISTBuildBuffers
*gfbb
);
565 extern void gistRelocateBuildBuffersOnSplit(GISTBuildBuffers
*gfbb
,
566 GISTSTATE
*giststate
, Relation r
,
567 int level
, Buffer buffer
,
569 extern void gistUnloadNodeBuffers(GISTBuildBuffers
*gfbb
);
571 #endif /* GIST_PRIVATE_H */