Query in SQL function still not schema-safe; add a couple
[PostgreSQL.git] / src / backend / access / hash / hashsort.c
blob10bf6a7b83b01f80c07a1a146bb053f963502b3c
1 /*-------------------------------------------------------------------------
3 * hashsort.c
4 * Sort tuples for insertion into a new hash index.
6 * When building a very large hash index, we pre-sort the tuples by bucket
7 * number to improve locality of access to the index, and thereby avoid
8 * thrashing. We use tuplesort.c to sort the given index tuples into order.
10 * Note: if the number of rows in the table has been underestimated,
11 * bucket splits may occur during the index build. In that case we'd
12 * be inserting into two or more buckets for each possible masked-off
13 * hash code value. That's no big problem though, since we'll still have
14 * plenty of locality of access.
17 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
18 * Portions Copyright (c) 1994, Regents of the University of California
20 * IDENTIFICATION
21 * $PostgreSQL$
23 *-------------------------------------------------------------------------
26 #include "postgres.h"
28 #include "access/hash.h"
29 #include "miscadmin.h"
30 #include "utils/tuplesort.h"
34 * Status record for spooling/sorting phase.
36 struct HSpool
38 Tuplesortstate *sortstate; /* state data for tuplesort.c */
39 Relation index;
44 * create and initialize a spool structure
46 HSpool *
47 _h_spoolinit(Relation index, uint32 num_buckets)
49 HSpool *hspool = (HSpool *) palloc0(sizeof(HSpool));
50 uint32 hash_mask;
52 hspool->index = index;
55 * Determine the bitmask for hash code values. Since there are currently
56 * num_buckets buckets in the index, the appropriate mask can be computed
57 * as follows.
59 * Note: at present, the passed-in num_buckets is always a power of 2, so
60 * we could just compute num_buckets - 1. We prefer not to assume that
61 * here, though.
63 hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1;
66 * We size the sort area as maintenance_work_mem rather than work_mem to
67 * speed index creation. This should be OK since a single backend can't
68 * run multiple index creations in parallel.
70 hspool->sortstate = tuplesort_begin_index_hash(index,
71 hash_mask,
72 maintenance_work_mem,
73 false);
75 return hspool;
79 * clean up a spool structure and its substructures.
81 void
82 _h_spooldestroy(HSpool *hspool)
84 tuplesort_end(hspool->sortstate);
85 pfree(hspool);
89 * spool an index entry into the sort file.
91 void
92 _h_spool(IndexTuple itup, HSpool *hspool)
94 tuplesort_putindextuple(hspool->sortstate, itup);
98 * given a spool loaded by successive calls to _h_spool,
99 * create an entire index.
101 void
102 _h_indexbuild(HSpool *hspool)
104 IndexTuple itup;
105 bool should_free;
107 tuplesort_performsort(hspool->sortstate);
109 while ((itup = tuplesort_getindextuple(hspool->sortstate,
110 true, &should_free)) != NULL)
112 _hash_doinsert(hspool->index, itup);
113 if (should_free)
114 pfree(itup);