src/backend/access/hash/hashsort.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * hashsort.c
   4  *              Sort tuples for insertion into a new hash index.
   5  *
   6  * When building a very large hash index, we pre-sort the tuples by bucket
   7  * number to improve locality of access to the index, and thereby avoid
   8  * thrashing.  We use tuplesort.c to sort the given index tuples into order.
   9  *
  10  * Note: if the number of rows in the table has been underestimated,
  11  * bucket splits may occur during the index build.      In that case we'd
  12  * be inserting into two or more buckets for each possible masked-off
  13  * hash code value.  That's no big problem though, since we'll still have
  14  * plenty of locality of access.
  15  *
  16  *
  17  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  18  * Portions Copyright (c) 1994, Regents of the University of California
  19  *
  20  * IDENTIFICATION
  21  *        $PostgreSQL$
  22  *
  23  *-------------------------------------------------------------------------
  24  */
  25
  26 #include "postgres.h"
  27
  28 #include "access/hash.h"
  29 #include "miscadmin.h"
  30 #include "utils/tuplesort.h"
  31
  32
  33 /*
  34  * Status record for spooling/sorting phase.
  35  */
  36 struct HSpool
  37 {
  38         Tuplesortstate *sortstate;      /* state data for tuplesort.c */
  39         Relation        index;
  40 };
  41
  42
  43 /*
  44  * create and initialize a spool structure
  45  */
  46 HSpool *
  47 _h_spoolinit(Relation index, uint32 num_buckets)
  48 {
  49         HSpool     *hspool = (HSpool *) palloc0(sizeof(HSpool));
  50         uint32          hash_mask;
  51
  52         hspool->index = index;
  53
  54         /*
  55          * Determine the bitmask for hash code values.  Since there are currently
  56          * num_buckets buckets in the index, the appropriate mask can be computed
  57          * as follows.
  58          *
  59          * Note: at present, the passed-in num_buckets is always a power of 2, so
  60          * we could just compute num_buckets - 1.  We prefer not to assume that
  61          * here, though.
  62          */
  63         hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1;
  64
  65         /*
  66          * We size the sort area as maintenance_work_mem rather than work_mem to
  67          * speed index creation.  This should be OK since a single backend can't
  68          * run multiple index creations in parallel.
  69          */
  70         hspool->sortstate = tuplesort_begin_index_hash(index,
  71                                                                                                    hash_mask,
  72                                                                                                    maintenance_work_mem,
  73                                                                                                    false);
  74
  75         return hspool;
  76 }
  77
  78 /*
  79  * clean up a spool structure and its substructures.
  80  */
  81 void
  82 _h_spooldestroy(HSpool *hspool)
  83 {
  84         tuplesort_end(hspool->sortstate);
  85         pfree(hspool);
  86 }
  87
  88 /*
  89  * spool an index entry into the sort file.
  90  */
  91 void
  92 _h_spool(IndexTuple itup, HSpool *hspool)
  93 {
  94         tuplesort_putindextuple(hspool->sortstate, itup);
  95 }
  96
  97 /*
  98  * given a spool loaded by successive calls to _h_spool,
  99  * create an entire index.
 100  */
 101 void
 102 _h_indexbuild(HSpool *hspool)
 103 {
 104         IndexTuple      itup;
 105         bool            should_free;
 106
 107         tuplesort_performsort(hspool->sortstate);
 108
 109         while ((itup = tuplesort_getindextuple(hspool->sortstate,
 110                                                                                    true, &should_free)) != NULL)
 111         {
 112                 _hash_doinsert(hspool->index, itup);
 113                 if (should_free)
 114                         pfree(itup);
 115         }
 116 }