src/backend/access/hash/hashsearch.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * hashsearch.c
   4  *        search code for postgres hash tables
   5  *
   6  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL$
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include "access/hash.h"
  18 #include "access/relscan.h"
  19 #include "miscadmin.h"
  20 #include "pgstat.h"
  21 #include "storage/bufmgr.h"
  22 #include "utils/rel.h"
  23
  24
  25 /*
  26  *      _hash_next() -- Get the next item in a scan.
  27  *
  28  *              On entry, we have a valid hashso_curpos in the scan, and a
  29  *              pin and read lock on the page that contains that item.
  30  *              We find the next item in the scan, if any.
  31  *              On success exit, we have the page containing the next item
  32  *              pinned and locked.
  33  */
  34 bool
  35 _hash_next(IndexScanDesc scan, ScanDirection dir)
  36 {
  37         Relation        rel = scan->indexRelation;
  38         HashScanOpaque so = (HashScanOpaque) scan->opaque;
  39         Buffer          buf;
  40         Page            page;
  41         OffsetNumber offnum;
  42         ItemPointer current;
  43         IndexTuple      itup;
  44
  45         /* we still have the buffer pinned and read-locked */
  46         buf = so->hashso_curbuf;
  47         Assert(BufferIsValid(buf));
  48
  49         /*
  50          * step to next valid tuple.
  51          */
  52         if (!_hash_step(scan, &buf, dir))
  53                 return false;
  54
  55         /* if we're here, _hash_step found a valid tuple */
  56         current = &(so->hashso_curpos);
  57         offnum = ItemPointerGetOffsetNumber(current);
  58         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
  59         page = BufferGetPage(buf);
  60         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
  61         scan->xs_ctup.t_self = itup->t_tid;
  62
  63         return true;
  64 }
  65
  66 /*
  67  * Advance to next page in a bucket, if any.
  68  */
  69 static void
  70 _hash_readnext(Relation rel,
  71                            Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
  72 {
  73         BlockNumber blkno;
  74
  75         blkno = (*opaquep)->hasho_nextblkno;
  76         _hash_relbuf(rel, *bufp);
  77         *bufp = InvalidBuffer;
  78         /* check for interrupts while we're not holding any buffer lock */
  79         CHECK_FOR_INTERRUPTS();
  80         if (BlockNumberIsValid(blkno))
  81         {
  82                 *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
  83                 *pagep = BufferGetPage(*bufp);
  84                 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
  85         }
  86 }
  87
  88 /*
  89  * Advance to previous page in a bucket, if any.
  90  */
  91 static void
  92 _hash_readprev(Relation rel,
  93                            Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
  94 {
  95         BlockNumber blkno;
  96
  97         blkno = (*opaquep)->hasho_prevblkno;
  98         _hash_relbuf(rel, *bufp);
  99         *bufp = InvalidBuffer;
 100         /* check for interrupts while we're not holding any buffer lock */
 101         CHECK_FOR_INTERRUPTS();
 102         if (BlockNumberIsValid(blkno))
 103         {
 104                 *bufp = _hash_getbuf(rel, blkno, HASH_READ,
 105                                                          LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 106                 *pagep = BufferGetPage(*bufp);
 107                 *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
 108         }
 109 }
 110
 111 /*
 112  *      _hash_first() -- Find the first item in a scan.
 113  *
 114  *              Find the first item in the index that
 115  *              satisfies the qualification associated with the scan descriptor. On
 116  *              success, the page containing the current index tuple is read locked
 117  *              and pinned, and the scan's opaque data entry is updated to
 118  *              include the buffer.
 119  */
 120 bool
 121 _hash_first(IndexScanDesc scan, ScanDirection dir)
 122 {
 123         Relation        rel = scan->indexRelation;
 124         HashScanOpaque so = (HashScanOpaque) scan->opaque;
 125         ScanKey         cur;
 126         uint32          hashkey;
 127         Bucket          bucket;
 128         BlockNumber blkno;
 129         Buffer          buf;
 130         Buffer          metabuf;
 131         Page            page;
 132         HashPageOpaque opaque;
 133         HashMetaPage metap;
 134         IndexTuple      itup;
 135         ItemPointer current;
 136         OffsetNumber offnum;
 137
 138         pgstat_count_index_scan(rel);
 139
 140         current = &(so->hashso_curpos);
 141         ItemPointerSetInvalid(current);
 142
 143         /*
 144          * We do not support hash scans with no index qualification, because we
 145          * would have to read the whole index rather than just one bucket. That
 146          * creates a whole raft of problems, since we haven't got a practical way
 147          * to lock all the buckets against splits or compactions.
 148          */
 149         if (scan->numberOfKeys < 1)
 150                 ereport(ERROR,
 151                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 152                                  errmsg("hash indexes do not support whole-index scans")));
 153
 154         /* There may be more than one index qual, but we hash only the first */
 155         cur = &scan->keyData[0];
 156
 157         /* We support only single-column hash indexes */
 158         Assert(cur->sk_attno == 1);
 159         /* And there's only one operator strategy, too */
 160         Assert(cur->sk_strategy == HTEqualStrategyNumber);
 161
 162         /*
 163          * If the constant in the index qual is NULL, assume it cannot match any
 164          * items in the index.
 165          */
 166         if (cur->sk_flags & SK_ISNULL)
 167                 return false;
 168
 169         /*
 170          * Okay to compute the hash key.  We want to do this before acquiring any
 171          * locks, in case a user-defined hash function happens to be slow.
 172          *
 173          * If scankey operator is not a cross-type comparison, we can use the
 174          * cached hash function; otherwise gotta look it up in the catalogs.
 175          *
 176          * We support the convention that sk_subtype == InvalidOid means the
 177          * opclass input type; this is a hack to simplify life for ScanKeyInit().
 178          */
 179         if (cur->sk_subtype == rel->rd_opcintype[0] ||
 180                 cur->sk_subtype == InvalidOid)
 181                 hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
 182         else
 183                 hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
 184                                                                                    cur->sk_subtype);
 185
 186         so->hashso_sk_hash = hashkey;
 187
 188         /*
 189          * Acquire shared split lock so we can compute the target bucket safely
 190          * (see README).
 191          */
 192         _hash_getlock(rel, 0, HASH_SHARE);
 193
 194         /* Read the metapage */
 195         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
 196         metap = HashPageGetMeta(BufferGetPage(metabuf));
 197
 198         /*
 199          * Compute the target bucket number, and convert to block number.
 200          */
 201         bucket = _hash_hashkey2bucket(hashkey,
 202                                                                   metap->hashm_maxbucket,
 203                                                                   metap->hashm_highmask,
 204                                                                   metap->hashm_lowmask);
 205
 206         blkno = BUCKET_TO_BLKNO(metap, bucket);
 207
 208         /* done with the metapage */
 209         _hash_relbuf(rel, metabuf);
 210
 211         /*
 212          * Acquire share lock on target bucket; then we can release split lock.
 213          */
 214         _hash_getlock(rel, blkno, HASH_SHARE);
 215
 216         _hash_droplock(rel, 0, HASH_SHARE);
 217
 218         /* Update scan opaque state to show we have lock on the bucket */
 219         so->hashso_bucket = bucket;
 220         so->hashso_bucket_valid = true;
 221         so->hashso_bucket_blkno = blkno;
 222
 223         /* Fetch the primary bucket page for the bucket */
 224         buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
 225         page = BufferGetPage(buf);
 226         opaque = (HashPageOpaque) PageGetSpecialPointer(page);
 227         Assert(opaque->hasho_bucket == bucket);
 228
 229         /* If a backwards scan is requested, move to the end of the chain */
 230         if (ScanDirectionIsBackward(dir))
 231         {
 232                 while (BlockNumberIsValid(opaque->hasho_nextblkno))
 233                         _hash_readnext(rel, &buf, &page, &opaque);
 234         }
 235
 236         /* Now find the first tuple satisfying the qualification */
 237         if (!_hash_step(scan, &buf, dir))
 238                 return false;
 239
 240         /* if we're here, _hash_step found a valid tuple */
 241         offnum = ItemPointerGetOffsetNumber(current);
 242         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 243         page = BufferGetPage(buf);
 244         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
 245         scan->xs_ctup.t_self = itup->t_tid;
 246
 247         return true;
 248 }
 249
 250 /*
 251  *      _hash_step() -- step to the next valid item in a scan in the bucket.
 252  *
 253  *              If no valid record exists in the requested direction, return
 254  *              false.  Else, return true and set the hashso_curpos for the
 255  *              scan to the right thing.
 256  *
 257  *              'bufP' points to the current buffer, which is pinned and read-locked.
 258  *              On success exit, we have pin and read-lock on whichever page
 259  *              contains the right item; on failure, we have released all buffers.
 260  */
 261 bool
 262 _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 263 {
 264         Relation        rel = scan->indexRelation;
 265         HashScanOpaque so = (HashScanOpaque) scan->opaque;
 266         ItemPointer current;
 267         Buffer          buf;
 268         Page            page;
 269         HashPageOpaque opaque;
 270         OffsetNumber maxoff;
 271         OffsetNumber offnum;
 272         BlockNumber blkno;
 273         IndexTuple      itup;
 274
 275         current = &(so->hashso_curpos);
 276
 277         buf = *bufP;
 278         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 279         page = BufferGetPage(buf);
 280         opaque = (HashPageOpaque) PageGetSpecialPointer(page);
 281
 282         /*
 283          * If _hash_step is called from _hash_first, current will not be valid, so
 284          * we can't dereference it.  However, in that case, we presumably want to
 285          * start at the beginning/end of the page...
 286          */
 287         maxoff = PageGetMaxOffsetNumber(page);
 288         if (ItemPointerIsValid(current))
 289                 offnum = ItemPointerGetOffsetNumber(current);
 290         else
 291                 offnum = InvalidOffsetNumber;
 292
 293         /*
 294          * 'offnum' now points to the last tuple we examined (if any).
 295          *
 296          * continue to step through tuples until: 1) we get to the end of the
 297          * bucket chain or 2) we find a valid tuple.
 298          */
 299         do
 300         {
 301                 switch (dir)
 302                 {
 303                         case ForwardScanDirection:
 304                                 if (offnum != InvalidOffsetNumber)
 305                                         offnum = OffsetNumberNext(offnum);      /* move forward */
 306                                 else
 307                                 {
 308                                         /* new page, locate starting position by binary search */
 309                                         offnum = _hash_binsearch(page, so->hashso_sk_hash);
 310                                 }
 311
 312                                 for (;;)
 313                                 {
 314                                         /*
 315                                          * check if we're still in the range of items with the
 316                                          * target hash key
 317                                          */
 318                                         if (offnum <= maxoff)
 319                                         {
 320                                                 Assert(offnum >= FirstOffsetNumber);
 321                                                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
 322                                                 if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
 323                                                         break;          /* yes, so exit for-loop */
 324                                         }
 325
 326                                         /*
 327                                          * ran off the end of this page, try the next
 328                                          */
 329                                         _hash_readnext(rel, &buf, &page, &opaque);
 330                                         if (BufferIsValid(buf))
 331                                         {
 332                                                 maxoff = PageGetMaxOffsetNumber(page);
 333                                                 offnum = _hash_binsearch(page, so->hashso_sk_hash);
 334                                         }
 335                                         else
 336                                         {
 337                                                 /* end of bucket */
 338                                                 itup = NULL;
 339                                                 break;  /* exit for-loop */
 340                                         }
 341                                 }
 342                                 break;
 343
 344                         case BackwardScanDirection:
 345                                 if (offnum != InvalidOffsetNumber)
 346                                         offnum = OffsetNumberPrev(offnum);      /* move back */
 347                                 else
 348                                 {
 349                                         /* new page, locate starting position by binary search */
 350                                         offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
 351                                 }
 352
 353                                 for (;;)
 354                                 {
 355                                         /*
 356                                          * check if we're still in the range of items with the
 357                                          * target hash key
 358                                          */
 359                                         if (offnum >= FirstOffsetNumber)
 360                                         {
 361                                                 Assert(offnum <= maxoff);
 362                                                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
 363                                                 if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
 364                                                         break;          /* yes, so exit for-loop */
 365                                         }
 366
 367                                         /*
 368                                          * ran off the end of this page, try the next
 369                                          */
 370                                         _hash_readprev(rel, &buf, &page, &opaque);
 371                                         if (BufferIsValid(buf))
 372                                         {
 373                                                 maxoff = PageGetMaxOffsetNumber(page);
 374                                                 offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
 375                                         }
 376                                         else
 377                                         {
 378                                                 /* end of bucket */
 379                                                 itup = NULL;
 380                                                 break;  /* exit for-loop */
 381                                         }
 382                                 }
 383                                 break;
 384
 385                         default:
 386                                 /* NoMovementScanDirection */
 387                                 /* this should not be reached */
 388                                 itup = NULL;
 389                                 break;
 390                 }
 391
 392                 if (itup == NULL)
 393                 {
 394                         /* we ran off the end of the bucket without finding a match */
 395                         *bufP = so->hashso_curbuf = InvalidBuffer;
 396                         ItemPointerSetInvalid(current);
 397                         return false;
 398                 }
 399
 400                 /* check the tuple quals, loop around if not met */
 401         } while (!_hash_checkqual(scan, itup));
 402
 403         /* if we made it to here, we've found a valid tuple */
 404         blkno = BufferGetBlockNumber(buf);
 405         *bufP = so->hashso_curbuf = buf;
 406         ItemPointerSet(current, blkno, offnum);
 407         return true;
 408 }