Fix pg_dump bug in the database-level collation patch. "datcollate" and
[PostgreSQL.git] / src / backend / executor / execGrouping.c
blob715b1bdb0e018c73ce31efa4c4f69ac152260df7
1 /*-------------------------------------------------------------------------
3 * execGrouping.c
4 * executor utility routines for grouping, hashing, and aggregation
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * $PostgreSQL$
13 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include "executor/executor.h"
18 #include "parser/parse_oper.h"
19 #include "utils/lsyscache.h"
20 #include "utils/memutils.h"
21 #include "utils/syscache.h"
24 static TupleHashTable CurTupleHashTable = NULL;
26 static uint32 TupleHashTableHash(const void *key, Size keysize);
27 static int TupleHashTableMatch(const void *key1, const void *key2,
28 Size keysize);
31 /*****************************************************************************
32 * Utility routines for grouping tuples together
33 *****************************************************************************/
36 * execTuplesMatch
37 * Return true if two tuples match in all the indicated fields.
39 * This actually implements SQL's notion of "not distinct". Two nulls
40 * match, a null and a not-null don't match.
42 * slot1, slot2: the tuples to compare (must have same columns!)
43 * numCols: the number of attributes to be examined
44 * matchColIdx: array of attribute column numbers
45 * eqFunctions: array of fmgr lookup info for the equality functions to use
46 * evalContext: short-term memory context for executing the functions
48 * NB: evalContext is reset each time!
50 bool
51 execTuplesMatch(TupleTableSlot *slot1,
52 TupleTableSlot *slot2,
53 int numCols,
54 AttrNumber *matchColIdx,
55 FmgrInfo *eqfunctions,
56 MemoryContext evalContext)
58 MemoryContext oldContext;
59 bool result;
60 int i;
62 /* Reset and switch into the temp context. */
63 MemoryContextReset(evalContext);
64 oldContext = MemoryContextSwitchTo(evalContext);
67 * We cannot report a match without checking all the fields, but we can
68 * report a non-match as soon as we find unequal fields. So, start
69 * comparing at the last field (least significant sort key). That's the
70 * most likely to be different if we are dealing with sorted input.
72 result = true;
74 for (i = numCols; --i >= 0;)
76 AttrNumber att = matchColIdx[i];
77 Datum attr1,
78 attr2;
79 bool isNull1,
80 isNull2;
82 attr1 = slot_getattr(slot1, att, &isNull1);
84 attr2 = slot_getattr(slot2, att, &isNull2);
86 if (isNull1 != isNull2)
88 result = false; /* one null and one not; they aren't equal */
89 break;
92 if (isNull1)
93 continue; /* both are null, treat as equal */
95 /* Apply the type-specific equality function */
97 if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
98 attr1, attr2)))
100 result = false; /* they aren't equal */
101 break;
105 MemoryContextSwitchTo(oldContext);
107 return result;
111 * execTuplesUnequal
112 * Return true if two tuples are definitely unequal in the indicated
113 * fields.
115 * Nulls are neither equal nor unequal to anything else. A true result
116 * is obtained only if there are non-null fields that compare not-equal.
118 * Parameters are identical to execTuplesMatch.
120 bool
121 execTuplesUnequal(TupleTableSlot *slot1,
122 TupleTableSlot *slot2,
123 int numCols,
124 AttrNumber *matchColIdx,
125 FmgrInfo *eqfunctions,
126 MemoryContext evalContext)
128 MemoryContext oldContext;
129 bool result;
130 int i;
132 /* Reset and switch into the temp context. */
133 MemoryContextReset(evalContext);
134 oldContext = MemoryContextSwitchTo(evalContext);
137 * We cannot report a match without checking all the fields, but we can
138 * report a non-match as soon as we find unequal fields. So, start
139 * comparing at the last field (least significant sort key). That's the
140 * most likely to be different if we are dealing with sorted input.
142 result = false;
144 for (i = numCols; --i >= 0;)
146 AttrNumber att = matchColIdx[i];
147 Datum attr1,
148 attr2;
149 bool isNull1,
150 isNull2;
152 attr1 = slot_getattr(slot1, att, &isNull1);
154 if (isNull1)
155 continue; /* can't prove anything here */
157 attr2 = slot_getattr(slot2, att, &isNull2);
159 if (isNull2)
160 continue; /* can't prove anything here */
162 /* Apply the type-specific equality function */
164 if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
165 attr1, attr2)))
167 result = true; /* they are unequal */
168 break;
172 MemoryContextSwitchTo(oldContext);
174 return result;
179 * execTuplesMatchPrepare
180 * Look up the equality functions needed for execTuplesMatch or
181 * execTuplesUnequal, given an array of equality operator OIDs.
183 * The result is a palloc'd array.
185 FmgrInfo *
186 execTuplesMatchPrepare(int numCols,
187 Oid *eqOperators)
189 FmgrInfo *eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
190 int i;
192 for (i = 0; i < numCols; i++)
194 Oid eq_opr = eqOperators[i];
195 Oid eq_function;
197 eq_function = get_opcode(eq_opr);
198 fmgr_info(eq_function, &eqFunctions[i]);
201 return eqFunctions;
205 * execTuplesHashPrepare
206 * Look up the equality and hashing functions needed for a TupleHashTable.
208 * This is similar to execTuplesMatchPrepare, but we also need to find the
209 * hash functions associated with the equality operators. *eqFunctions and
210 * *hashFunctions receive the palloc'd result arrays.
212 * Note: we expect that the given operators are not cross-type comparisons.
214 void
215 execTuplesHashPrepare(int numCols,
216 Oid *eqOperators,
217 FmgrInfo **eqFunctions,
218 FmgrInfo **hashFunctions)
220 int i;
222 *eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
223 *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
225 for (i = 0; i < numCols; i++)
227 Oid eq_opr = eqOperators[i];
228 Oid eq_function;
229 Oid left_hash_function;
230 Oid right_hash_function;
232 eq_function = get_opcode(eq_opr);
233 if (!get_op_hash_functions(eq_opr,
234 &left_hash_function, &right_hash_function))
235 elog(ERROR, "could not find hash function for hash operator %u",
236 eq_opr);
237 /* We're not supporting cross-type cases here */
238 Assert(left_hash_function == right_hash_function);
239 fmgr_info(eq_function, &(*eqFunctions)[i]);
240 fmgr_info(right_hash_function, &(*hashFunctions)[i]);
245 /*****************************************************************************
246 * Utility routines for all-in-memory hash tables
248 * These routines build hash tables for grouping tuples together (eg, for
249 * hash aggregation). There is one entry for each not-distinct set of tuples
250 * presented.
251 *****************************************************************************/
254 * Construct an empty TupleHashTable
256 * numCols, keyColIdx: identify the tuple fields to use as lookup key
257 * eqfunctions: equality comparison functions to use
258 * hashfunctions: datatype-specific hashing functions to use
259 * nbuckets: initial estimate of hashtable size
260 * entrysize: size of each entry (at least sizeof(TupleHashEntryData))
261 * tablecxt: memory context in which to store table and table entries
262 * tempcxt: short-lived context for evaluation hash and comparison functions
264 * The function arrays may be made with execTuplesHashPrepare(). Note they
265 * are not cross-type functions, but expect to see the table datatype(s)
266 * on both sides.
268 * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
269 * storage that will live as long as the hashtable does.
271 TupleHashTable
272 BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
273 FmgrInfo *eqfunctions,
274 FmgrInfo *hashfunctions,
275 int nbuckets, Size entrysize,
276 MemoryContext tablecxt, MemoryContext tempcxt)
278 TupleHashTable hashtable;
279 HASHCTL hash_ctl;
281 Assert(nbuckets > 0);
282 Assert(entrysize >= sizeof(TupleHashEntryData));
284 hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt,
285 sizeof(TupleHashTableData));
287 hashtable->numCols = numCols;
288 hashtable->keyColIdx = keyColIdx;
289 hashtable->tab_hash_funcs = hashfunctions;
290 hashtable->tab_eq_funcs = eqfunctions;
291 hashtable->tablecxt = tablecxt;
292 hashtable->tempcxt = tempcxt;
293 hashtable->entrysize = entrysize;
294 hashtable->tableslot = NULL; /* will be made on first lookup */
295 hashtable->inputslot = NULL;
296 hashtable->in_hash_funcs = NULL;
297 hashtable->cur_eq_funcs = NULL;
299 MemSet(&hash_ctl, 0, sizeof(hash_ctl));
300 hash_ctl.keysize = sizeof(TupleHashEntryData);
301 hash_ctl.entrysize = entrysize;
302 hash_ctl.hash = TupleHashTableHash;
303 hash_ctl.match = TupleHashTableMatch;
304 hash_ctl.hcxt = tablecxt;
305 hashtable->hashtab = hash_create("TupleHashTable", (long) nbuckets,
306 &hash_ctl,
307 HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
309 return hashtable;
313 * Find or create a hashtable entry for the tuple group containing the
314 * given tuple. The tuple must be the same type as the hashtable entries.
316 * If isnew is NULL, we do not create new entries; we return NULL if no
317 * match is found.
319 * If isnew isn't NULL, then a new entry is created if no existing entry
320 * matches. On return, *isnew is true if the entry is newly created,
321 * false if it existed already. Any extra space in a new entry has been
322 * zeroed.
324 TupleHashEntry
325 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
326 bool *isnew)
328 TupleHashEntry entry;
329 MemoryContext oldContext;
330 TupleHashTable saveCurHT;
331 TupleHashEntryData dummy;
332 bool found;
334 /* If first time through, clone the input slot to make table slot */
335 if (hashtable->tableslot == NULL)
337 TupleDesc tupdesc;
339 oldContext = MemoryContextSwitchTo(hashtable->tablecxt);
342 * We copy the input tuple descriptor just for safety --- we assume
343 * all input tuples will have equivalent descriptors.
345 tupdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
346 hashtable->tableslot = MakeSingleTupleTableSlot(tupdesc);
347 MemoryContextSwitchTo(oldContext);
350 /* Need to run the hash functions in short-lived context */
351 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
354 * Set up data needed by hash and match functions
356 * We save and restore CurTupleHashTable just in case someone manages to
357 * invoke this code re-entrantly.
359 hashtable->inputslot = slot;
360 hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
361 hashtable->cur_eq_funcs = hashtable->tab_eq_funcs;
363 saveCurHT = CurTupleHashTable;
364 CurTupleHashTable = hashtable;
366 /* Search the hash table */
367 dummy.firstTuple = NULL; /* flag to reference inputslot */
368 entry = (TupleHashEntry) hash_search(hashtable->hashtab,
369 &dummy,
370 isnew ? HASH_ENTER : HASH_FIND,
371 &found);
373 if (isnew)
375 if (found)
377 /* found pre-existing entry */
378 *isnew = false;
380 else
383 * created new entry
385 * Zero any caller-requested space in the entry. (This zaps the
386 * "key data" dynahash.c copied into the new entry, but we don't
387 * care since we're about to overwrite it anyway.)
389 MemSet(entry, 0, hashtable->entrysize);
391 /* Copy the first tuple into the table context */
392 MemoryContextSwitchTo(hashtable->tablecxt);
393 entry->firstTuple = ExecCopySlotMinimalTuple(slot);
395 *isnew = true;
399 CurTupleHashTable = saveCurHT;
401 MemoryContextSwitchTo(oldContext);
403 return entry;
407 * Search for a hashtable entry matching the given tuple. No entry is
408 * created if there's not a match. This is similar to the non-creating
409 * case of LookupTupleHashEntry, except that it supports cross-type
410 * comparisons, in which the given tuple is not of the same type as the
411 * table entries. The caller must provide the hash functions to use for
412 * the input tuple, as well as the equality functions, since these may be
413 * different from the table's internal functions.
415 TupleHashEntry
416 FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
417 FmgrInfo *eqfunctions,
418 FmgrInfo *hashfunctions)
420 TupleHashEntry entry;
421 MemoryContext oldContext;
422 TupleHashTable saveCurHT;
423 TupleHashEntryData dummy;
425 /* Need to run the hash functions in short-lived context */
426 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
429 * Set up data needed by hash and match functions
431 * We save and restore CurTupleHashTable just in case someone manages to
432 * invoke this code re-entrantly.
434 hashtable->inputslot = slot;
435 hashtable->in_hash_funcs = hashfunctions;
436 hashtable->cur_eq_funcs = eqfunctions;
438 saveCurHT = CurTupleHashTable;
439 CurTupleHashTable = hashtable;
441 /* Search the hash table */
442 dummy.firstTuple = NULL; /* flag to reference inputslot */
443 entry = (TupleHashEntry) hash_search(hashtable->hashtab,
444 &dummy,
445 HASH_FIND,
446 NULL);
448 CurTupleHashTable = saveCurHT;
450 MemoryContextSwitchTo(oldContext);
452 return entry;
456 * Compute the hash value for a tuple
458 * The passed-in key is a pointer to TupleHashEntryData. In an actual hash
459 * table entry, the firstTuple field points to a tuple (in MinimalTuple
460 * format). LookupTupleHashEntry sets up a dummy TupleHashEntryData with a
461 * NULL firstTuple field --- that cues us to look at the inputslot instead.
462 * This convention avoids the need to materialize virtual input tuples unless
463 * they actually need to get copied into the table.
465 * CurTupleHashTable must be set before calling this, since dynahash.c
466 * doesn't provide any API that would let us get at the hashtable otherwise.
468 * Also, the caller must select an appropriate memory context for running
469 * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
471 static uint32
472 TupleHashTableHash(const void *key, Size keysize)
474 MinimalTuple tuple = ((const TupleHashEntryData *) key)->firstTuple;
475 TupleTableSlot *slot;
476 TupleHashTable hashtable = CurTupleHashTable;
477 int numCols = hashtable->numCols;
478 AttrNumber *keyColIdx = hashtable->keyColIdx;
479 FmgrInfo *hashfunctions;
480 uint32 hashkey = 0;
481 int i;
483 if (tuple == NULL)
485 /* Process the current input tuple for the table */
486 slot = hashtable->inputslot;
487 hashfunctions = hashtable->in_hash_funcs;
489 else
491 /* Process a tuple already stored in the table */
492 /* (this case never actually occurs in current dynahash.c code) */
493 slot = hashtable->tableslot;
494 ExecStoreMinimalTuple(tuple, slot, false);
495 hashfunctions = hashtable->tab_hash_funcs;
498 for (i = 0; i < numCols; i++)
500 AttrNumber att = keyColIdx[i];
501 Datum attr;
502 bool isNull;
504 /* rotate hashkey left 1 bit at each step */
505 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
507 attr = slot_getattr(slot, att, &isNull);
509 if (!isNull) /* treat nulls as having hash key 0 */
511 uint32 hkey;
513 hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
514 attr));
515 hashkey ^= hkey;
519 return hashkey;
523 * See whether two tuples (presumably of the same hash value) match
525 * As above, the passed pointers are pointers to TupleHashEntryData.
527 * CurTupleHashTable must be set before calling this, since dynahash.c
528 * doesn't provide any API that would let us get at the hashtable otherwise.
530 * Also, the caller must select an appropriate memory context for running
531 * the compare functions. (dynahash.c doesn't change CurrentMemoryContext.)
533 static int
534 TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
536 MinimalTuple tuple1 = ((const TupleHashEntryData *) key1)->firstTuple;
538 #ifdef USE_ASSERT_CHECKING
539 MinimalTuple tuple2 = ((const TupleHashEntryData *) key2)->firstTuple;
540 #endif
541 TupleTableSlot *slot1;
542 TupleTableSlot *slot2;
543 TupleHashTable hashtable = CurTupleHashTable;
546 * We assume that dynahash.c will only ever call us with the first
547 * argument being an actual table entry, and the second argument being
548 * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
549 * could be supported too, but is not currently used by dynahash.c.
551 Assert(tuple1 != NULL);
552 slot1 = hashtable->tableslot;
553 ExecStoreMinimalTuple(tuple1, slot1, false);
554 Assert(tuple2 == NULL);
555 slot2 = hashtable->inputslot;
557 /* For crosstype comparisons, the inputslot must be first */
558 if (execTuplesMatch(slot2,
559 slot1,
560 hashtable->numCols,
561 hashtable->keyColIdx,
562 hashtable->cur_eq_funcs,
563 hashtable->tempcxt))
564 return 0;
565 else
566 return 1;