1 /*-------------------------------------------------------------------------
4 * search code for postgres hash tables
6 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
13 *-------------------------------------------------------------------------
17 #include "access/hash.h"
18 #include "access/relscan.h"
19 #include "miscadmin.h"
21 #include "storage/bufmgr.h"
22 #include "utils/rel.h"
26 * _hash_next() -- Get the next item in a scan.
28 * On entry, we have a valid hashso_curpos in the scan, and a
29 * pin and read lock on the page that contains that item.
30 * We find the next item in the scan, if any.
31 * On success exit, we have the page containing the next item
35 _hash_next(IndexScanDesc scan
, ScanDirection dir
)
37 Relation rel
= scan
->indexRelation
;
38 HashScanOpaque so
= (HashScanOpaque
) scan
->opaque
;
45 /* we still have the buffer pinned and read-locked */
46 buf
= so
->hashso_curbuf
;
47 Assert(BufferIsValid(buf
));
50 * step to next valid tuple.
52 if (!_hash_step(scan
, &buf
, dir
))
55 /* if we're here, _hash_step found a valid tuple */
56 current
= &(so
->hashso_curpos
);
57 offnum
= ItemPointerGetOffsetNumber(current
);
58 _hash_checkpage(rel
, buf
, LH_BUCKET_PAGE
| LH_OVERFLOW_PAGE
);
59 page
= BufferGetPage(buf
);
60 itup
= (IndexTuple
) PageGetItem(page
, PageGetItemId(page
, offnum
));
61 scan
->xs_ctup
.t_self
= itup
->t_tid
;
67 * Advance to next page in a bucket, if any.
70 _hash_readnext(Relation rel
,
71 Buffer
*bufp
, Page
*pagep
, HashPageOpaque
*opaquep
)
75 blkno
= (*opaquep
)->hasho_nextblkno
;
76 _hash_relbuf(rel
, *bufp
);
77 *bufp
= InvalidBuffer
;
78 /* check for interrupts while we're not holding any buffer lock */
79 CHECK_FOR_INTERRUPTS();
80 if (BlockNumberIsValid(blkno
))
82 *bufp
= _hash_getbuf(rel
, blkno
, HASH_READ
, LH_OVERFLOW_PAGE
);
83 *pagep
= BufferGetPage(*bufp
);
84 *opaquep
= (HashPageOpaque
) PageGetSpecialPointer(*pagep
);
89 * Advance to previous page in a bucket, if any.
92 _hash_readprev(Relation rel
,
93 Buffer
*bufp
, Page
*pagep
, HashPageOpaque
*opaquep
)
97 blkno
= (*opaquep
)->hasho_prevblkno
;
98 _hash_relbuf(rel
, *bufp
);
99 *bufp
= InvalidBuffer
;
100 /* check for interrupts while we're not holding any buffer lock */
101 CHECK_FOR_INTERRUPTS();
102 if (BlockNumberIsValid(blkno
))
104 *bufp
= _hash_getbuf(rel
, blkno
, HASH_READ
,
105 LH_BUCKET_PAGE
| LH_OVERFLOW_PAGE
);
106 *pagep
= BufferGetPage(*bufp
);
107 *opaquep
= (HashPageOpaque
) PageGetSpecialPointer(*pagep
);
112 * _hash_first() -- Find the first item in a scan.
114 * Find the first item in the index that
115 * satisfies the qualification associated with the scan descriptor. On
116 * success, the page containing the current index tuple is read locked
117 * and pinned, and the scan's opaque data entry is updated to
118 * include the buffer.
121 _hash_first(IndexScanDesc scan
, ScanDirection dir
)
123 Relation rel
= scan
->indexRelation
;
124 HashScanOpaque so
= (HashScanOpaque
) scan
->opaque
;
132 HashPageOpaque opaque
;
138 pgstat_count_index_scan(rel
);
140 current
= &(so
->hashso_curpos
);
141 ItemPointerSetInvalid(current
);
144 * We do not support hash scans with no index qualification, because we
145 * would have to read the whole index rather than just one bucket. That
146 * creates a whole raft of problems, since we haven't got a practical way
147 * to lock all the buckets against splits or compactions.
149 if (scan
->numberOfKeys
< 1)
151 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
152 errmsg("hash indexes do not support whole-index scans")));
154 /* There may be more than one index qual, but we hash only the first */
155 cur
= &scan
->keyData
[0];
157 /* We support only single-column hash indexes */
158 Assert(cur
->sk_attno
== 1);
159 /* And there's only one operator strategy, too */
160 Assert(cur
->sk_strategy
== HTEqualStrategyNumber
);
163 * If the constant in the index qual is NULL, assume it cannot match any
164 * items in the index.
166 if (cur
->sk_flags
& SK_ISNULL
)
170 * Okay to compute the hash key. We want to do this before acquiring any
171 * locks, in case a user-defined hash function happens to be slow.
173 * If scankey operator is not a cross-type comparison, we can use the
174 * cached hash function; otherwise gotta look it up in the catalogs.
176 * We support the convention that sk_subtype == InvalidOid means the
177 * opclass input type; this is a hack to simplify life for ScanKeyInit().
179 if (cur
->sk_subtype
== rel
->rd_opcintype
[0] ||
180 cur
->sk_subtype
== InvalidOid
)
181 hashkey
= _hash_datum2hashkey(rel
, cur
->sk_argument
);
183 hashkey
= _hash_datum2hashkey_type(rel
, cur
->sk_argument
,
186 so
->hashso_sk_hash
= hashkey
;
189 * Acquire shared split lock so we can compute the target bucket safely
192 _hash_getlock(rel
, 0, HASH_SHARE
);
194 /* Read the metapage */
195 metabuf
= _hash_getbuf(rel
, HASH_METAPAGE
, HASH_READ
, LH_META_PAGE
);
196 metap
= HashPageGetMeta(BufferGetPage(metabuf
));
199 * Compute the target bucket number, and convert to block number.
201 bucket
= _hash_hashkey2bucket(hashkey
,
202 metap
->hashm_maxbucket
,
203 metap
->hashm_highmask
,
204 metap
->hashm_lowmask
);
206 blkno
= BUCKET_TO_BLKNO(metap
, bucket
);
208 /* done with the metapage */
209 _hash_relbuf(rel
, metabuf
);
212 * Acquire share lock on target bucket; then we can release split lock.
214 _hash_getlock(rel
, blkno
, HASH_SHARE
);
216 _hash_droplock(rel
, 0, HASH_SHARE
);
218 /* Update scan opaque state to show we have lock on the bucket */
219 so
->hashso_bucket
= bucket
;
220 so
->hashso_bucket_valid
= true;
221 so
->hashso_bucket_blkno
= blkno
;
223 /* Fetch the primary bucket page for the bucket */
224 buf
= _hash_getbuf(rel
, blkno
, HASH_READ
, LH_BUCKET_PAGE
);
225 page
= BufferGetPage(buf
);
226 opaque
= (HashPageOpaque
) PageGetSpecialPointer(page
);
227 Assert(opaque
->hasho_bucket
== bucket
);
229 /* If a backwards scan is requested, move to the end of the chain */
230 if (ScanDirectionIsBackward(dir
))
232 while (BlockNumberIsValid(opaque
->hasho_nextblkno
))
233 _hash_readnext(rel
, &buf
, &page
, &opaque
);
236 /* Now find the first tuple satisfying the qualification */
237 if (!_hash_step(scan
, &buf
, dir
))
240 /* if we're here, _hash_step found a valid tuple */
241 offnum
= ItemPointerGetOffsetNumber(current
);
242 _hash_checkpage(rel
, buf
, LH_BUCKET_PAGE
| LH_OVERFLOW_PAGE
);
243 page
= BufferGetPage(buf
);
244 itup
= (IndexTuple
) PageGetItem(page
, PageGetItemId(page
, offnum
));
245 scan
->xs_ctup
.t_self
= itup
->t_tid
;
251 * _hash_step() -- step to the next valid item in a scan in the bucket.
253 * If no valid record exists in the requested direction, return
254 * false. Else, return true and set the hashso_curpos for the
255 * scan to the right thing.
257 * 'bufP' points to the current buffer, which is pinned and read-locked.
258 * On success exit, we have pin and read-lock on whichever page
259 * contains the right item; on failure, we have released all buffers.
262 _hash_step(IndexScanDesc scan
, Buffer
*bufP
, ScanDirection dir
)
264 Relation rel
= scan
->indexRelation
;
265 HashScanOpaque so
= (HashScanOpaque
) scan
->opaque
;
269 HashPageOpaque opaque
;
275 current
= &(so
->hashso_curpos
);
278 _hash_checkpage(rel
, buf
, LH_BUCKET_PAGE
| LH_OVERFLOW_PAGE
);
279 page
= BufferGetPage(buf
);
280 opaque
= (HashPageOpaque
) PageGetSpecialPointer(page
);
283 * If _hash_step is called from _hash_first, current will not be valid, so
284 * we can't dereference it. However, in that case, we presumably want to
285 * start at the beginning/end of the page...
287 maxoff
= PageGetMaxOffsetNumber(page
);
288 if (ItemPointerIsValid(current
))
289 offnum
= ItemPointerGetOffsetNumber(current
);
291 offnum
= InvalidOffsetNumber
;
294 * 'offnum' now points to the last tuple we examined (if any).
296 * continue to step through tuples until: 1) we get to the end of the
297 * bucket chain or 2) we find a valid tuple.
303 case ForwardScanDirection
:
304 if (offnum
!= InvalidOffsetNumber
)
305 offnum
= OffsetNumberNext(offnum
); /* move forward */
308 /* new page, locate starting position by binary search */
309 offnum
= _hash_binsearch(page
, so
->hashso_sk_hash
);
315 * check if we're still in the range of items with the
318 if (offnum
<= maxoff
)
320 Assert(offnum
>= FirstOffsetNumber
);
321 itup
= (IndexTuple
) PageGetItem(page
, PageGetItemId(page
, offnum
));
322 if (so
->hashso_sk_hash
== _hash_get_indextuple_hashkey(itup
))
323 break; /* yes, so exit for-loop */
327 * ran off the end of this page, try the next
329 _hash_readnext(rel
, &buf
, &page
, &opaque
);
330 if (BufferIsValid(buf
))
332 maxoff
= PageGetMaxOffsetNumber(page
);
333 offnum
= _hash_binsearch(page
, so
->hashso_sk_hash
);
339 break; /* exit for-loop */
344 case BackwardScanDirection
:
345 if (offnum
!= InvalidOffsetNumber
)
346 offnum
= OffsetNumberPrev(offnum
); /* move back */
349 /* new page, locate starting position by binary search */
350 offnum
= _hash_binsearch_last(page
, so
->hashso_sk_hash
);
356 * check if we're still in the range of items with the
359 if (offnum
>= FirstOffsetNumber
)
361 Assert(offnum
<= maxoff
);
362 itup
= (IndexTuple
) PageGetItem(page
, PageGetItemId(page
, offnum
));
363 if (so
->hashso_sk_hash
== _hash_get_indextuple_hashkey(itup
))
364 break; /* yes, so exit for-loop */
368 * ran off the end of this page, try the next
370 _hash_readprev(rel
, &buf
, &page
, &opaque
);
371 if (BufferIsValid(buf
))
373 maxoff
= PageGetMaxOffsetNumber(page
);
374 offnum
= _hash_binsearch_last(page
, so
->hashso_sk_hash
);
380 break; /* exit for-loop */
386 /* NoMovementScanDirection */
387 /* this should not be reached */
394 /* we ran off the end of the bucket without finding a match */
395 *bufP
= so
->hashso_curbuf
= InvalidBuffer
;
396 ItemPointerSetInvalid(current
);
400 /* check the tuple quals, loop around if not met */
401 } while (!_hash_checkqual(scan
, itup
));
403 /* if we made it to here, we've found a valid tuple */
404 blkno
= BufferGetBlockNumber(buf
);
405 *bufP
= so
->hashso_curbuf
= buf
;
406 ItemPointerSet(current
, blkno
, offnum
);