1 /*-------------------------------------------------------------------------
4 * general index access method routines
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
14 * index_open - open an index relation by relation OID
15 * index_close - close an index relation
16 * index_beginscan - start a scan of an index with amgettuple
17 * index_beginscan_bitmap - start a scan of an index with amgetbitmap
18 * index_rescan - restart a scan of an index
19 * index_endscan - end a scan
20 * index_insert - insert an index tuple into a relation
21 * index_markpos - mark a scan position
22 * index_restrpos - restore a scan position
23 * index_getnext - get the next tuple from a scan
24 * index_getbitmap - get all tuples from a scan
25 * index_bulk_delete - bulk deletion of index tuples
26 * index_vacuum_cleanup - post-deletion cleanup of an index
27 * index_getprocid - get a support procedure OID
28 * index_getprocinfo - get a support procedure's lookup info
31 * This file contains the index_ routines which used
32 * to be a scattered collection of stuff in access/genam.
36 * Scans are implemented as follows:
38 * `0' represents an invalid item pointer.
39 * `-' represents an unknown item pointer.
40 * `X' represents a known item pointers.
41 * `+' represents known or invalid item pointers.
42 * `*' represents any item pointers.
44 * State is represented by a triple of these symbols in the order of
45 * previous, current, next. Note that the case of reverse scans works
49 * (1) + + - + 0 0 (if the next item pointer is invalid)
50 * (2) + X - (otherwise)
51 * (3) * 0 0 * 0 0 (no change)
52 * (4) + X 0 X 0 0 (shift)
53 * (5) * + X + X - (shift, add unknown)
55 * All other states cannot occur.
57 * Note: It would be possible to cache the status of the previous and
58 * next item pointer using the flags.
60 *-------------------------------------------------------------------------
65 #include "access/relscan.h"
66 #include "access/transam.h"
68 #include "storage/bufmgr.h"
69 #include "storage/lmgr.h"
70 #include "utils/relcache.h"
71 #include "utils/snapmgr.h"
72 #include "utils/tqual.h"
75 /* ----------------------------------------------------------------
76 * macros used in index_ routines
77 * ----------------------------------------------------------------
79 #define RELATION_CHECKS \
81 AssertMacro(RelationIsValid(indexRelation)), \
82 AssertMacro(PointerIsValid(indexRelation->rd_am)) \
87 AssertMacro(IndexScanIsValid(scan)), \
88 AssertMacro(RelationIsValid(scan->indexRelation)), \
89 AssertMacro(PointerIsValid(scan->indexRelation->rd_am)) \
92 #define GET_REL_PROCEDURE(pname) \
94 procedure = &indexRelation->rd_aminfo->pname; \
95 if (!OidIsValid(procedure->fn_oid)) \
97 RegProcedure procOid = indexRelation->rd_am->pname; \
98 if (!RegProcedureIsValid(procOid)) \
99 elog(ERROR, "invalid %s regproc", CppAsString(pname)); \
100 fmgr_info_cxt(procOid, procedure, indexRelation->rd_indexcxt); \
104 #define GET_SCAN_PROCEDURE(pname) \
106 procedure = &scan->indexRelation->rd_aminfo->pname; \
107 if (!OidIsValid(procedure->fn_oid)) \
109 RegProcedure procOid = scan->indexRelation->rd_am->pname; \
110 if (!RegProcedureIsValid(procOid)) \
111 elog(ERROR, "invalid %s regproc", CppAsString(pname)); \
112 fmgr_info_cxt(procOid, procedure, scan->indexRelation->rd_indexcxt); \
116 static IndexScanDesc
index_beginscan_internal(Relation indexRelation
,
117 int nkeys
, ScanKey key
);
120 /* ----------------------------------------------------------------
121 * index_ interface functions
122 * ----------------------------------------------------------------
126 * index_open - open an index relation by relation OID
128 * If lockmode is not "NoLock", the specified kind of lock is
129 * obtained on the index. (Generally, NoLock should only be
130 * used if the caller knows it has some appropriate lock on the
133 * An error is raised if the index does not exist.
135 * This is a convenience routine adapted for indexscan use.
136 * Some callers may prefer to use relation_open directly.
140 index_open(Oid relationId
, LOCKMODE lockmode
)
144 r
= relation_open(relationId
, lockmode
);
146 if (r
->rd_rel
->relkind
!= RELKIND_INDEX
)
148 (errcode(ERRCODE_WRONG_OBJECT_TYPE
),
149 errmsg("\"%s\" is not an index",
150 RelationGetRelationName(r
))));
156 * index_close - close an index relation
158 * If lockmode is not "NoLock", we then release the specified lock.
160 * Note that it is often sensible to hold a lock beyond index_close;
161 * in that case, the lock is released automatically at xact end.
165 index_close(Relation relation
, LOCKMODE lockmode
)
167 LockRelId relid
= relation
->rd_lockInfo
.lockRelId
;
169 Assert(lockmode
>= NoLock
&& lockmode
< MAX_LOCKMODES
);
171 /* The relcache does the real work... */
172 RelationClose(relation
);
174 if (lockmode
!= NoLock
)
175 UnlockRelationId(&relid
, lockmode
);
179 * index_insert - insert an index tuple into a relation
183 index_insert(Relation indexRelation
,
186 ItemPointer heap_t_ctid
,
187 Relation heapRelation
,
188 bool check_uniqueness
)
193 GET_REL_PROCEDURE(aminsert
);
196 * have the am's insert proc do all the work.
198 return DatumGetBool(FunctionCall6(procedure
,
199 PointerGetDatum(indexRelation
),
200 PointerGetDatum(values
),
201 PointerGetDatum(isnull
),
202 PointerGetDatum(heap_t_ctid
),
203 PointerGetDatum(heapRelation
),
204 BoolGetDatum(check_uniqueness
)));
208 * index_beginscan - start a scan of an index with amgettuple
210 * Caller must be holding suitable locks on the heap and the index.
213 index_beginscan(Relation heapRelation
,
214 Relation indexRelation
,
216 int nkeys
, ScanKey key
)
220 scan
= index_beginscan_internal(indexRelation
, nkeys
, key
);
223 * Save additional parameters into the scandesc. Everything else was set
224 * up by RelationGetIndexScan.
226 scan
->heapRelation
= heapRelation
;
227 scan
->xs_snapshot
= snapshot
;
233 * index_beginscan_bitmap - start a scan of an index with amgetbitmap
235 * As above, caller had better be holding some lock on the parent heap
236 * relation, even though it's not explicitly mentioned here.
239 index_beginscan_bitmap(Relation indexRelation
,
241 int nkeys
, ScanKey key
)
245 scan
= index_beginscan_internal(indexRelation
, nkeys
, key
);
248 * Save additional parameters into the scandesc. Everything else was set
249 * up by RelationGetIndexScan.
251 scan
->xs_snapshot
= snapshot
;
257 * index_beginscan_internal --- common code for index_beginscan variants
260 index_beginscan_internal(Relation indexRelation
,
261 int nkeys
, ScanKey key
)
267 GET_REL_PROCEDURE(ambeginscan
);
270 * We hold a reference count to the relcache entry throughout the scan.
272 RelationIncrementReferenceCount(indexRelation
);
275 * Tell the AM to open a scan.
277 scan
= (IndexScanDesc
)
278 DatumGetPointer(FunctionCall3(procedure
,
279 PointerGetDatum(indexRelation
),
280 Int32GetDatum(nkeys
),
281 PointerGetDatum(key
)));
287 * index_rescan - (re)start a scan of an index
289 * The caller may specify a new set of scankeys (but the number of keys
290 * cannot change). To restart the scan without changing keys, pass NULL
293 * Note that this is also called when first starting an indexscan;
294 * see RelationGetIndexScan. Keys *must* be passed in that case,
295 * unless scan->numberOfKeys is zero.
299 index_rescan(IndexScanDesc scan
, ScanKey key
)
304 GET_SCAN_PROCEDURE(amrescan
);
306 /* Release any held pin on a heap page */
307 if (BufferIsValid(scan
->xs_cbuf
))
309 ReleaseBuffer(scan
->xs_cbuf
);
310 scan
->xs_cbuf
= InvalidBuffer
;
313 scan
->xs_next_hot
= InvalidOffsetNumber
;
315 scan
->kill_prior_tuple
= false; /* for safety */
317 FunctionCall2(procedure
,
318 PointerGetDatum(scan
),
319 PointerGetDatum(key
));
323 * index_endscan - end a scan
327 index_endscan(IndexScanDesc scan
)
332 GET_SCAN_PROCEDURE(amendscan
);
334 /* Release any held pin on a heap page */
335 if (BufferIsValid(scan
->xs_cbuf
))
337 ReleaseBuffer(scan
->xs_cbuf
);
338 scan
->xs_cbuf
= InvalidBuffer
;
341 /* End the AM's scan */
342 FunctionCall1(procedure
, PointerGetDatum(scan
));
344 /* Release index refcount acquired by index_beginscan */
345 RelationDecrementReferenceCount(scan
->indexRelation
);
347 /* Release the scan data structure itself */
352 * index_markpos - mark a scan position
356 index_markpos(IndexScanDesc scan
)
361 GET_SCAN_PROCEDURE(ammarkpos
);
363 FunctionCall1(procedure
, PointerGetDatum(scan
));
367 * index_restrpos - restore a scan position
369 * NOTE: this only restores the internal scan state of the index AM.
370 * The current result tuple (scan->xs_ctup) doesn't change. See comments
371 * for ExecRestrPos().
373 * NOTE: in the presence of HOT chains, mark/restore only works correctly
374 * if the scan's snapshot is MVCC-safe; that ensures that there's at most one
375 * returnable tuple in each HOT chain, and so restoring the prior state at the
376 * granularity of the index AM is sufficient. Since the only current user
377 * of mark/restore functionality is nodeMergejoin.c, this effectively means
378 * that merge-join plans only work for MVCC snapshots. This could be fixed
379 * if necessary, but for now it seems unimportant.
383 index_restrpos(IndexScanDesc scan
)
387 Assert(IsMVCCSnapshot(scan
->xs_snapshot
));
390 GET_SCAN_PROCEDURE(amrestrpos
);
392 scan
->xs_next_hot
= InvalidOffsetNumber
;
394 scan
->kill_prior_tuple
= false; /* for safety */
396 FunctionCall1(procedure
, PointerGetDatum(scan
));
400 * index_getnext - get the next heap tuple from a scan
402 * The result is the next heap tuple satisfying the scan keys and the
403 * snapshot, or NULL if no more matching tuples exist. On success,
404 * the buffer containing the heap tuple is pinned (the pin will be dropped
405 * at the next index_getnext or index_endscan).
407 * Note: caller must check scan->xs_recheck, and perform rechecking of the
408 * scan keys if required. We do not do that here because we don't have
409 * enough information to do it efficiently in the general case.
413 index_getnext(IndexScanDesc scan
, ScanDirection direction
)
415 HeapTuple heapTuple
= &scan
->xs_ctup
;
416 ItemPointer tid
= &heapTuple
->t_self
;
420 GET_SCAN_PROCEDURE(amgettuple
);
422 Assert(TransactionIdIsValid(RecentGlobalXmin
));
425 * We always reset xs_hot_dead; if we are here then either we are just
426 * starting the scan, or we previously returned a visible tuple, and in
427 * either case it's inappropriate to kill the prior index entry.
429 scan
->xs_hot_dead
= false;
437 if (scan
->xs_next_hot
!= InvalidOffsetNumber
)
440 * We are resuming scan of a HOT chain after having returned an
441 * earlier member. Must still hold pin on current heap page.
443 Assert(BufferIsValid(scan
->xs_cbuf
));
444 Assert(ItemPointerGetBlockNumber(tid
) ==
445 BufferGetBlockNumber(scan
->xs_cbuf
));
446 Assert(TransactionIdIsValid(scan
->xs_prev_xmax
));
447 offnum
= scan
->xs_next_hot
;
448 at_chain_start
= false;
449 scan
->xs_next_hot
= InvalidOffsetNumber
;
457 * If we scanned a whole HOT chain and found only dead tuples,
458 * tell index AM to kill its entry for that TID.
460 scan
->kill_prior_tuple
= scan
->xs_hot_dead
;
463 * The AM's gettuple proc finds the next index entry matching the
464 * scan keys, and puts the TID in xs_ctup.t_self (ie, *tid).
465 * It should also set scan->xs_recheck, though we pay no
466 * attention to that here.
468 found
= DatumGetBool(FunctionCall2(procedure
,
469 PointerGetDatum(scan
),
470 Int32GetDatum(direction
)));
472 /* Reset kill flag immediately for safety */
473 scan
->kill_prior_tuple
= false;
475 /* If we're out of index entries, break out of outer loop */
479 pgstat_count_index_tuples(scan
->indexRelation
, 1);
481 /* Switch to correct buffer if we don't have it already */
482 prev_buf
= scan
->xs_cbuf
;
483 scan
->xs_cbuf
= ReleaseAndReadBuffer(scan
->xs_cbuf
,
485 ItemPointerGetBlockNumber(tid
));
488 * Prune page, but only if we weren't already on this page
490 if (prev_buf
!= scan
->xs_cbuf
)
491 heap_page_prune_opt(scan
->heapRelation
, scan
->xs_cbuf
,
494 /* Prepare to scan HOT chain starting at index-referenced offnum */
495 offnum
= ItemPointerGetOffsetNumber(tid
);
496 at_chain_start
= true;
498 /* We don't know what the first tuple's xmin should be */
499 scan
->xs_prev_xmax
= InvalidTransactionId
;
501 /* Initialize flag to detect if all entries are dead */
502 scan
->xs_hot_dead
= true;
505 /* Obtain share-lock on the buffer so we can examine visibility */
506 LockBuffer(scan
->xs_cbuf
, BUFFER_LOCK_SHARE
);
508 dp
= (Page
) BufferGetPage(scan
->xs_cbuf
);
510 /* Scan through possible multiple members of HOT-chain */
516 /* check for bogus TID */
517 if (offnum
< FirstOffsetNumber
||
518 offnum
> PageGetMaxOffsetNumber(dp
))
521 lp
= PageGetItemId(dp
, offnum
);
523 /* check for unused, dead, or redirected items */
524 if (!ItemIdIsNormal(lp
))
526 /* We should only see a redirect at start of chain */
527 if (ItemIdIsRedirected(lp
) && at_chain_start
)
529 /* Follow the redirect */
530 offnum
= ItemIdGetRedirect(lp
);
531 at_chain_start
= false;
534 /* else must be end of chain */
539 * We must initialize all of *heapTuple (ie, scan->xs_ctup) since
540 * it is returned to the executor on success.
542 heapTuple
->t_data
= (HeapTupleHeader
) PageGetItem(dp
, lp
);
543 heapTuple
->t_len
= ItemIdGetLength(lp
);
544 ItemPointerSetOffsetNumber(tid
, offnum
);
545 heapTuple
->t_tableOid
= RelationGetRelid(scan
->heapRelation
);
546 ctid
= &heapTuple
->t_data
->t_ctid
;
549 * Shouldn't see a HEAP_ONLY tuple at chain start. (This test
550 * should be unnecessary, since the chain root can't be removed
551 * while we have pin on the index entry, but let's make it
554 if (at_chain_start
&& HeapTupleIsHeapOnly(heapTuple
))
558 * The xmin should match the previous xmax value, else chain is
559 * broken. (Note: this test is not optional because it protects
560 * us against the case where the prior chain member's xmax aborted
561 * since we looked at it.)
563 if (TransactionIdIsValid(scan
->xs_prev_xmax
) &&
564 !TransactionIdEquals(scan
->xs_prev_xmax
,
565 HeapTupleHeaderGetXmin(heapTuple
->t_data
)))
568 /* If it's visible per the snapshot, we must return it */
569 if (HeapTupleSatisfiesVisibility(heapTuple
, scan
->xs_snapshot
,
573 * If the snapshot is MVCC, we know that it could accept at
574 * most one member of the HOT chain, so we can skip examining
575 * any more members. Otherwise, check for continuation of the
576 * HOT-chain, and set state for next time.
578 if (IsMVCCSnapshot(scan
->xs_snapshot
))
579 scan
->xs_next_hot
= InvalidOffsetNumber
;
580 else if (HeapTupleIsHotUpdated(heapTuple
))
582 Assert(ItemPointerGetBlockNumber(ctid
) ==
583 ItemPointerGetBlockNumber(tid
));
584 scan
->xs_next_hot
= ItemPointerGetOffsetNumber(ctid
);
585 scan
->xs_prev_xmax
= HeapTupleHeaderGetXmax(heapTuple
->t_data
);
588 scan
->xs_next_hot
= InvalidOffsetNumber
;
590 LockBuffer(scan
->xs_cbuf
, BUFFER_LOCK_UNLOCK
);
592 pgstat_count_heap_fetch(scan
->indexRelation
);
598 * If we can't see it, maybe no one else can either. Check to see
599 * if the tuple is dead to all transactions. If we find that all
600 * the tuples in the HOT chain are dead, we'll signal the index AM
601 * to not return that TID on future indexscans.
603 if (scan
->xs_hot_dead
&&
604 HeapTupleSatisfiesVacuum(heapTuple
->t_data
, RecentGlobalXmin
,
605 scan
->xs_cbuf
) != HEAPTUPLE_DEAD
)
606 scan
->xs_hot_dead
= false;
609 * Check to see if HOT chain continues past this tuple; if so
610 * fetch the next offnum (we don't bother storing it into
611 * xs_next_hot, but must store xs_prev_xmax), and loop around.
613 if (HeapTupleIsHotUpdated(heapTuple
))
615 Assert(ItemPointerGetBlockNumber(ctid
) ==
616 ItemPointerGetBlockNumber(tid
));
617 offnum
= ItemPointerGetOffsetNumber(ctid
);
618 at_chain_start
= false;
619 scan
->xs_prev_xmax
= HeapTupleHeaderGetXmax(heapTuple
->t_data
);
622 break; /* end of chain */
623 } /* loop over a single HOT chain */
625 LockBuffer(scan
->xs_cbuf
, BUFFER_LOCK_UNLOCK
);
627 /* Loop around to ask index AM for another TID */
628 scan
->xs_next_hot
= InvalidOffsetNumber
;
631 /* Release any held pin on a heap page */
632 if (BufferIsValid(scan
->xs_cbuf
))
634 ReleaseBuffer(scan
->xs_cbuf
);
635 scan
->xs_cbuf
= InvalidBuffer
;
638 return NULL
; /* failure exit */
642 * index_getbitmap - get all tuples at once from an index scan
644 * Adds the TIDs of all heap tuples satisfying the scan keys to a bitmap.
645 * Since there's no interlock between the index scan and the eventual heap
646 * access, this is only safe to use with MVCC-based snapshots: the heap
647 * item slot could have been replaced by a newer tuple by the time we get
650 * Returns the number of matching tuples found.
654 index_getbitmap(IndexScanDesc scan
, TIDBitmap
*bitmap
)
660 GET_SCAN_PROCEDURE(amgetbitmap
);
662 /* just make sure this is false... */
663 scan
->kill_prior_tuple
= false;
666 * have the am's getbitmap proc do all the work.
668 ntids
= DatumGetInt64(FunctionCall2(procedure
,
669 PointerGetDatum(scan
),
670 PointerGetDatum(bitmap
)));
672 pgstat_count_index_tuples(scan
->indexRelation
, ntids
);
678 * index_bulk_delete - do mass deletion of index entries
680 * callback routine tells whether a given main-heap tuple is
683 * return value is an optional palloc'd struct of statistics
686 IndexBulkDeleteResult
*
687 index_bulk_delete(IndexVacuumInfo
*info
,
688 IndexBulkDeleteResult
*stats
,
689 IndexBulkDeleteCallback callback
,
690 void *callback_state
)
692 Relation indexRelation
= info
->index
;
694 IndexBulkDeleteResult
*result
;
697 GET_REL_PROCEDURE(ambulkdelete
);
699 result
= (IndexBulkDeleteResult
*)
700 DatumGetPointer(FunctionCall4(procedure
,
701 PointerGetDatum(info
),
702 PointerGetDatum(stats
),
703 PointerGetDatum((Pointer
) callback
),
704 PointerGetDatum(callback_state
)));
710 * index_vacuum_cleanup - do post-deletion cleanup of an index
712 * return value is an optional palloc'd struct of statistics
715 IndexBulkDeleteResult
*
716 index_vacuum_cleanup(IndexVacuumInfo
*info
,
717 IndexBulkDeleteResult
*stats
)
719 Relation indexRelation
= info
->index
;
721 IndexBulkDeleteResult
*result
;
724 GET_REL_PROCEDURE(amvacuumcleanup
);
726 result
= (IndexBulkDeleteResult
*)
727 DatumGetPointer(FunctionCall2(procedure
,
728 PointerGetDatum(info
),
729 PointerGetDatum(stats
)));
737 * Index access methods typically require support routines that are
738 * not directly the implementation of any WHERE-clause query operator
739 * and so cannot be kept in pg_amop. Instead, such routines are kept
740 * in pg_amproc. These registered procedure OIDs are assigned numbers
741 * according to a convention established by the access method.
742 * The general index code doesn't know anything about the routines
743 * involved; it just builds an ordered list of them for
744 * each attribute on which an index is defined.
746 * As of Postgres 8.3, support routines within an operator family
747 * are further subdivided by the "left type" and "right type" of the
748 * query operator(s) that they support. The "default" functions for a
749 * particular indexed attribute are those with both types equal to
750 * the index opclass' opcintype (note that this is subtly different
751 * from the indexed attribute's own type: it may be a binary-compatible
752 * type instead). Only the default functions are stored in relcache
753 * entries --- access methods can use the syscache to look up non-default
756 * This routine returns the requested default procedure OID for a
757 * particular indexed attribute.
761 index_getprocid(Relation irel
,
769 nproc
= irel
->rd_am
->amsupport
;
771 Assert(procnum
> 0 && procnum
<= (uint16
) nproc
);
773 procindex
= (nproc
* (attnum
- 1)) + (procnum
- 1);
775 loc
= irel
->rd_support
;
779 return loc
[procindex
];
785 * This routine allows index AMs to keep fmgr lookup info for
786 * support procs in the relcache. As above, only the "default"
787 * functions for any particular indexed attribute are cached.
789 * Note: the return value points into cached data that will be lost during
790 * any relcache rebuild! Therefore, either use the callinfo right away,
791 * or save it only after having acquired some type of lock on the index rel.
795 index_getprocinfo(Relation irel
,
803 nproc
= irel
->rd_am
->amsupport
;
805 Assert(procnum
> 0 && procnum
<= (uint16
) nproc
);
807 procindex
= (nproc
* (attnum
- 1)) + (procnum
- 1);
809 locinfo
= irel
->rd_supportinfo
;
811 Assert(locinfo
!= NULL
);
813 locinfo
+= procindex
;
815 /* Initialize the lookup info if first time through */
816 if (locinfo
->fn_oid
== InvalidOid
)
818 RegProcedure
*loc
= irel
->rd_support
;
823 procId
= loc
[procindex
];
826 * Complain if function was not found during IndexSupportInitialize.
827 * This should not happen unless the system tables contain bogus
828 * entries for the index opclass. (If an AM wants to allow a support
829 * function to be optional, it can use index_getprocid.)
831 if (!RegProcedureIsValid(procId
))
832 elog(ERROR
, "missing support function %d for attribute %d of index \"%s\"",
833 procnum
, attnum
, RelationGetRelationName(irel
));
835 fmgr_info_cxt(procId
, locinfo
, irel
->rd_indexcxt
);