src/backend/access/hash/hashinsert.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * hashinsert.c
   4  *        Item insertion in hash tables for Postgres.
   5  *
   6  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL$
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15
  16 #include "postgres.h"
  17
  18 #include "access/hash.h"
  19 #include "storage/bufmgr.h"
  20 #include "utils/rel.h"
  21
  22
  23 static OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf,
  24                            Size itemsize, IndexTuple itup);
  25
  26
  27 /*
  28  *      _hash_doinsert() -- Handle insertion of a single index tuple.
  29  *
  30  *              This routine is called by the public interface routines, hashbuild
  31  *              and hashinsert.  By here, itup is completely filled in.
  32  */
  33 void
  34 _hash_doinsert(Relation rel, IndexTuple itup)
  35 {
  36         Buffer          buf;
  37         Buffer          metabuf;
  38         HashMetaPage metap;
  39         BlockNumber blkno;
  40         Page            page;
  41         HashPageOpaque pageopaque;
  42         Size            itemsz;
  43         bool            do_expand;
  44         uint32          hashkey;
  45         Bucket          bucket;
  46
  47         /*
  48          * Get the hash key for the item (it's stored in the index tuple itself).
  49          */
  50         hashkey = _hash_get_indextuple_hashkey(itup);
  51
  52         /* compute item size too */
  53         itemsz = IndexTupleDSize(*itup);
  54         itemsz = MAXALIGN(itemsz);      /* be safe, PageAddItem will do this but we
  55                                                                  * need to be consistent */
  56
  57         /*
  58          * Acquire shared split lock so we can compute the target bucket safely
  59          * (see README).
  60          */
  61         _hash_getlock(rel, 0, HASH_SHARE);
  62
  63         /* Read the metapage */
  64         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
  65         metap = HashPageGetMeta(BufferGetPage(metabuf));
  66
  67         /*
  68          * Check whether the item can fit on a hash page at all. (Eventually, we
  69          * ought to try to apply TOAST methods if not.)  Note that at this point,
  70          * itemsz doesn't include the ItemId.
  71          *
  72          * XXX this is useless code if we are only storing hash keys.
  73          */
  74         if (itemsz > HashMaxItemSize((Page) metap))
  75                 ereport(ERROR,
  76                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
  77                                  errmsg("index row size %lu exceeds hash maximum %lu",
  78                                                 (unsigned long) itemsz,
  79                                                 (unsigned long) HashMaxItemSize((Page) metap)),
  80                         errhint("Values larger than a buffer page cannot be indexed.")));
  81
  82         /*
  83          * Compute the target bucket number, and convert to block number.
  84          */
  85         bucket = _hash_hashkey2bucket(hashkey,
  86                                                                   metap->hashm_maxbucket,
  87                                                                   metap->hashm_highmask,
  88                                                                   metap->hashm_lowmask);
  89
  90         blkno = BUCKET_TO_BLKNO(metap, bucket);
  91
  92         /* release lock on metapage, but keep pin since we'll need it again */
  93         _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
  94
  95         /*
  96          * Acquire share lock on target bucket; then we can release split lock.
  97          */
  98         _hash_getlock(rel, blkno, HASH_SHARE);
  99
 100         _hash_droplock(rel, 0, HASH_SHARE);
 101
 102         /* Fetch the primary bucket page for the bucket */
 103         buf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BUCKET_PAGE);
 104         page = BufferGetPage(buf);
 105         pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
 106         Assert(pageopaque->hasho_bucket == bucket);
 107
 108         /* Do the insertion */
 109         while (PageGetFreeSpace(page) < itemsz)
 110         {
 111                 /*
 112                  * no space on this page; check for an overflow page
 113                  */
 114                 BlockNumber nextblkno = pageopaque->hasho_nextblkno;
 115
 116                 if (BlockNumberIsValid(nextblkno))
 117                 {
 118                         /*
 119                          * ovfl page exists; go get it.  if it doesn't have room, we'll
 120                          * find out next pass through the loop test above.
 121                          */
 122                         _hash_relbuf(rel, buf);
 123                         buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
 124                         page = BufferGetPage(buf);
 125                 }
 126                 else
 127                 {
 128                         /*
 129                          * we're at the end of the bucket chain and we haven't found a
 130                          * page with enough room.  allocate a new overflow page.
 131                          */
 132
 133                         /* release our write lock without modifying buffer */
 134                         _hash_chgbufaccess(rel, buf, HASH_READ, HASH_NOLOCK);
 135
 136                         /* chain to a new overflow page */
 137                         buf = _hash_addovflpage(rel, metabuf, buf);
 138                         page = BufferGetPage(buf);
 139
 140                         /* should fit now, given test above */
 141                         Assert(PageGetFreeSpace(page) >= itemsz);
 142                 }
 143                 pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
 144                 Assert(pageopaque->hasho_flag == LH_OVERFLOW_PAGE);
 145                 Assert(pageopaque->hasho_bucket == bucket);
 146         }
 147
 148         /* found page with enough space, so add the item here */
 149         (void) _hash_pgaddtup(rel, buf, itemsz, itup);
 150
 151         /* write and release the modified page */
 152         _hash_wrtbuf(rel, buf);
 153
 154         /* We can drop the bucket lock now */
 155         _hash_droplock(rel, blkno, HASH_SHARE);
 156
 157         /*
 158          * Write-lock the metapage so we can increment the tuple count. After
 159          * incrementing it, check to see if it's time for a split.
 160          */
 161         _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
 162
 163         metap->hashm_ntuples += 1;
 164
 165         /* Make sure this stays in sync with _hash_expandtable() */
 166         do_expand = metap->hashm_ntuples >
 167                 (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
 168
 169         /* Write out the metapage and drop lock, but keep pin */
 170         _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
 171
 172         /* Attempt to split if a split is needed */
 173         if (do_expand)
 174                 _hash_expandtable(rel, metabuf);
 175
 176         /* Finally drop our pin on the metapage */
 177         _hash_dropbuf(rel, metabuf);
 178 }
 179
 180 /*
 181  *      _hash_pgaddtup() -- add a tuple to a particular page in the index.
 182  *
 183  *              This routine adds the tuple to the page as requested; it does
 184  *              not write out the page.  It is an error to call pgaddtup() without
 185  *              a write lock and pin.
 186  */
 187 static OffsetNumber
 188 _hash_pgaddtup(Relation rel,
 189                            Buffer buf,
 190                            Size itemsize,
 191                            IndexTuple itup)
 192 {
 193         OffsetNumber itup_off;
 194         Page            page;
 195         uint32          hashkey;
 196
 197         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 198         page = BufferGetPage(buf);
 199
 200         /* Find where to insert the tuple (preserving page's hashkey ordering) */
 201         hashkey = _hash_get_indextuple_hashkey(itup);
 202         itup_off = _hash_binsearch(page, hashkey);
 203
 204         if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
 205                 == InvalidOffsetNumber)
 206                 elog(ERROR, "failed to add index item to \"%s\"",
 207                          RelationGetRelationName(rel));
 208
 209         return itup_off;
 210 }