1 /*-------------------------------------------------------------------------
4 * executor utility routines for grouping, hashing, and aggregation
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
13 *-------------------------------------------------------------------------
17 #include "executor/executor.h"
18 #include "parser/parse_oper.h"
19 #include "utils/lsyscache.h"
20 #include "utils/memutils.h"
21 #include "utils/syscache.h"
24 static TupleHashTable CurTupleHashTable
= NULL
;
26 static uint32
TupleHashTableHash(const void *key
, Size keysize
);
27 static int TupleHashTableMatch(const void *key1
, const void *key2
,
31 /*****************************************************************************
32 * Utility routines for grouping tuples together
33 *****************************************************************************/
37 * Return true if two tuples match in all the indicated fields.
39 * This actually implements SQL's notion of "not distinct". Two nulls
40 * match, a null and a not-null don't match.
42 * slot1, slot2: the tuples to compare (must have same columns!)
43 * numCols: the number of attributes to be examined
44 * matchColIdx: array of attribute column numbers
45 * eqFunctions: array of fmgr lookup info for the equality functions to use
46 * evalContext: short-term memory context for executing the functions
48 * NB: evalContext is reset each time!
51 execTuplesMatch(TupleTableSlot
*slot1
,
52 TupleTableSlot
*slot2
,
54 AttrNumber
*matchColIdx
,
55 FmgrInfo
*eqfunctions
,
56 MemoryContext evalContext
)
58 MemoryContext oldContext
;
62 /* Reset and switch into the temp context. */
63 MemoryContextReset(evalContext
);
64 oldContext
= MemoryContextSwitchTo(evalContext
);
67 * We cannot report a match without checking all the fields, but we can
68 * report a non-match as soon as we find unequal fields. So, start
69 * comparing at the last field (least significant sort key). That's the
70 * most likely to be different if we are dealing with sorted input.
74 for (i
= numCols
; --i
>= 0;)
76 AttrNumber att
= matchColIdx
[i
];
82 attr1
= slot_getattr(slot1
, att
, &isNull1
);
84 attr2
= slot_getattr(slot2
, att
, &isNull2
);
86 if (isNull1
!= isNull2
)
88 result
= false; /* one null and one not; they aren't equal */
93 continue; /* both are null, treat as equal */
95 /* Apply the type-specific equality function */
97 if (!DatumGetBool(FunctionCall2(&eqfunctions
[i
],
100 result
= false; /* they aren't equal */
105 MemoryContextSwitchTo(oldContext
);
112 * Return true if two tuples are definitely unequal in the indicated
115 * Nulls are neither equal nor unequal to anything else. A true result
116 * is obtained only if there are non-null fields that compare not-equal.
118 * Parameters are identical to execTuplesMatch.
121 execTuplesUnequal(TupleTableSlot
*slot1
,
122 TupleTableSlot
*slot2
,
124 AttrNumber
*matchColIdx
,
125 FmgrInfo
*eqfunctions
,
126 MemoryContext evalContext
)
128 MemoryContext oldContext
;
132 /* Reset and switch into the temp context. */
133 MemoryContextReset(evalContext
);
134 oldContext
= MemoryContextSwitchTo(evalContext
);
137 * We cannot report a match without checking all the fields, but we can
138 * report a non-match as soon as we find unequal fields. So, start
139 * comparing at the last field (least significant sort key). That's the
140 * most likely to be different if we are dealing with sorted input.
144 for (i
= numCols
; --i
>= 0;)
146 AttrNumber att
= matchColIdx
[i
];
152 attr1
= slot_getattr(slot1
, att
, &isNull1
);
155 continue; /* can't prove anything here */
157 attr2
= slot_getattr(slot2
, att
, &isNull2
);
160 continue; /* can't prove anything here */
162 /* Apply the type-specific equality function */
164 if (!DatumGetBool(FunctionCall2(&eqfunctions
[i
],
167 result
= true; /* they are unequal */
172 MemoryContextSwitchTo(oldContext
);
179 * execTuplesMatchPrepare
180 * Look up the equality functions needed for execTuplesMatch or
181 * execTuplesUnequal, given an array of equality operator OIDs.
183 * The result is a palloc'd array.
186 execTuplesMatchPrepare(int numCols
,
189 FmgrInfo
*eqFunctions
= (FmgrInfo
*) palloc(numCols
* sizeof(FmgrInfo
));
192 for (i
= 0; i
< numCols
; i
++)
194 Oid eq_opr
= eqOperators
[i
];
197 eq_function
= get_opcode(eq_opr
);
198 fmgr_info(eq_function
, &eqFunctions
[i
]);
205 * execTuplesHashPrepare
206 * Look up the equality and hashing functions needed for a TupleHashTable.
208 * This is similar to execTuplesMatchPrepare, but we also need to find the
209 * hash functions associated with the equality operators. *eqFunctions and
210 * *hashFunctions receive the palloc'd result arrays.
212 * Note: we expect that the given operators are not cross-type comparisons.
215 execTuplesHashPrepare(int numCols
,
217 FmgrInfo
**eqFunctions
,
218 FmgrInfo
**hashFunctions
)
222 *eqFunctions
= (FmgrInfo
*) palloc(numCols
* sizeof(FmgrInfo
));
223 *hashFunctions
= (FmgrInfo
*) palloc(numCols
* sizeof(FmgrInfo
));
225 for (i
= 0; i
< numCols
; i
++)
227 Oid eq_opr
= eqOperators
[i
];
229 Oid left_hash_function
;
230 Oid right_hash_function
;
232 eq_function
= get_opcode(eq_opr
);
233 if (!get_op_hash_functions(eq_opr
,
234 &left_hash_function
, &right_hash_function
))
235 elog(ERROR
, "could not find hash function for hash operator %u",
237 /* We're not supporting cross-type cases here */
238 Assert(left_hash_function
== right_hash_function
);
239 fmgr_info(eq_function
, &(*eqFunctions
)[i
]);
240 fmgr_info(right_hash_function
, &(*hashFunctions
)[i
]);
245 /*****************************************************************************
246 * Utility routines for all-in-memory hash tables
248 * These routines build hash tables for grouping tuples together (eg, for
249 * hash aggregation). There is one entry for each not-distinct set of tuples
251 *****************************************************************************/
254 * Construct an empty TupleHashTable
256 * numCols, keyColIdx: identify the tuple fields to use as lookup key
257 * eqfunctions: equality comparison functions to use
258 * hashfunctions: datatype-specific hashing functions to use
259 * nbuckets: initial estimate of hashtable size
260 * entrysize: size of each entry (at least sizeof(TupleHashEntryData))
261 * tablecxt: memory context in which to store table and table entries
262 * tempcxt: short-lived context for evaluation hash and comparison functions
264 * The function arrays may be made with execTuplesHashPrepare(). Note they
265 * are not cross-type functions, but expect to see the table datatype(s)
268 * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
269 * storage that will live as long as the hashtable does.
272 BuildTupleHashTable(int numCols
, AttrNumber
*keyColIdx
,
273 FmgrInfo
*eqfunctions
,
274 FmgrInfo
*hashfunctions
,
275 int nbuckets
, Size entrysize
,
276 MemoryContext tablecxt
, MemoryContext tempcxt
)
278 TupleHashTable hashtable
;
281 Assert(nbuckets
> 0);
282 Assert(entrysize
>= sizeof(TupleHashEntryData
));
284 hashtable
= (TupleHashTable
) MemoryContextAlloc(tablecxt
,
285 sizeof(TupleHashTableData
));
287 hashtable
->numCols
= numCols
;
288 hashtable
->keyColIdx
= keyColIdx
;
289 hashtable
->tab_hash_funcs
= hashfunctions
;
290 hashtable
->tab_eq_funcs
= eqfunctions
;
291 hashtable
->tablecxt
= tablecxt
;
292 hashtable
->tempcxt
= tempcxt
;
293 hashtable
->entrysize
= entrysize
;
294 hashtable
->tableslot
= NULL
; /* will be made on first lookup */
295 hashtable
->inputslot
= NULL
;
296 hashtable
->in_hash_funcs
= NULL
;
297 hashtable
->cur_eq_funcs
= NULL
;
299 MemSet(&hash_ctl
, 0, sizeof(hash_ctl
));
300 hash_ctl
.keysize
= sizeof(TupleHashEntryData
);
301 hash_ctl
.entrysize
= entrysize
;
302 hash_ctl
.hash
= TupleHashTableHash
;
303 hash_ctl
.match
= TupleHashTableMatch
;
304 hash_ctl
.hcxt
= tablecxt
;
305 hashtable
->hashtab
= hash_create("TupleHashTable", (long) nbuckets
,
307 HASH_ELEM
| HASH_FUNCTION
| HASH_COMPARE
| HASH_CONTEXT
);
313 * Find or create a hashtable entry for the tuple group containing the
314 * given tuple. The tuple must be the same type as the hashtable entries.
316 * If isnew is NULL, we do not create new entries; we return NULL if no
319 * If isnew isn't NULL, then a new entry is created if no existing entry
320 * matches. On return, *isnew is true if the entry is newly created,
321 * false if it existed already. Any extra space in a new entry has been
325 LookupTupleHashEntry(TupleHashTable hashtable
, TupleTableSlot
*slot
,
328 TupleHashEntry entry
;
329 MemoryContext oldContext
;
330 TupleHashTable saveCurHT
;
331 TupleHashEntryData dummy
;
334 /* If first time through, clone the input slot to make table slot */
335 if (hashtable
->tableslot
== NULL
)
339 oldContext
= MemoryContextSwitchTo(hashtable
->tablecxt
);
342 * We copy the input tuple descriptor just for safety --- we assume
343 * all input tuples will have equivalent descriptors.
345 tupdesc
= CreateTupleDescCopy(slot
->tts_tupleDescriptor
);
346 hashtable
->tableslot
= MakeSingleTupleTableSlot(tupdesc
);
347 MemoryContextSwitchTo(oldContext
);
350 /* Need to run the hash functions in short-lived context */
351 oldContext
= MemoryContextSwitchTo(hashtable
->tempcxt
);
354 * Set up data needed by hash and match functions
356 * We save and restore CurTupleHashTable just in case someone manages to
357 * invoke this code re-entrantly.
359 hashtable
->inputslot
= slot
;
360 hashtable
->in_hash_funcs
= hashtable
->tab_hash_funcs
;
361 hashtable
->cur_eq_funcs
= hashtable
->tab_eq_funcs
;
363 saveCurHT
= CurTupleHashTable
;
364 CurTupleHashTable
= hashtable
;
366 /* Search the hash table */
367 dummy
.firstTuple
= NULL
; /* flag to reference inputslot */
368 entry
= (TupleHashEntry
) hash_search(hashtable
->hashtab
,
370 isnew
? HASH_ENTER
: HASH_FIND
,
377 /* found pre-existing entry */
385 * Zero any caller-requested space in the entry. (This zaps the
386 * "key data" dynahash.c copied into the new entry, but we don't
387 * care since we're about to overwrite it anyway.)
389 MemSet(entry
, 0, hashtable
->entrysize
);
391 /* Copy the first tuple into the table context */
392 MemoryContextSwitchTo(hashtable
->tablecxt
);
393 entry
->firstTuple
= ExecCopySlotMinimalTuple(slot
);
399 CurTupleHashTable
= saveCurHT
;
401 MemoryContextSwitchTo(oldContext
);
407 * Search for a hashtable entry matching the given tuple. No entry is
408 * created if there's not a match. This is similar to the non-creating
409 * case of LookupTupleHashEntry, except that it supports cross-type
410 * comparisons, in which the given tuple is not of the same type as the
411 * table entries. The caller must provide the hash functions to use for
412 * the input tuple, as well as the equality functions, since these may be
413 * different from the table's internal functions.
416 FindTupleHashEntry(TupleHashTable hashtable
, TupleTableSlot
*slot
,
417 FmgrInfo
*eqfunctions
,
418 FmgrInfo
*hashfunctions
)
420 TupleHashEntry entry
;
421 MemoryContext oldContext
;
422 TupleHashTable saveCurHT
;
423 TupleHashEntryData dummy
;
425 /* Need to run the hash functions in short-lived context */
426 oldContext
= MemoryContextSwitchTo(hashtable
->tempcxt
);
429 * Set up data needed by hash and match functions
431 * We save and restore CurTupleHashTable just in case someone manages to
432 * invoke this code re-entrantly.
434 hashtable
->inputslot
= slot
;
435 hashtable
->in_hash_funcs
= hashfunctions
;
436 hashtable
->cur_eq_funcs
= eqfunctions
;
438 saveCurHT
= CurTupleHashTable
;
439 CurTupleHashTable
= hashtable
;
441 /* Search the hash table */
442 dummy
.firstTuple
= NULL
; /* flag to reference inputslot */
443 entry
= (TupleHashEntry
) hash_search(hashtable
->hashtab
,
448 CurTupleHashTable
= saveCurHT
;
450 MemoryContextSwitchTo(oldContext
);
456 * Compute the hash value for a tuple
458 * The passed-in key is a pointer to TupleHashEntryData. In an actual hash
459 * table entry, the firstTuple field points to a tuple (in MinimalTuple
460 * format). LookupTupleHashEntry sets up a dummy TupleHashEntryData with a
461 * NULL firstTuple field --- that cues us to look at the inputslot instead.
462 * This convention avoids the need to materialize virtual input tuples unless
463 * they actually need to get copied into the table.
465 * CurTupleHashTable must be set before calling this, since dynahash.c
466 * doesn't provide any API that would let us get at the hashtable otherwise.
468 * Also, the caller must select an appropriate memory context for running
469 * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
472 TupleHashTableHash(const void *key
, Size keysize
)
474 MinimalTuple tuple
= ((const TupleHashEntryData
*) key
)->firstTuple
;
475 TupleTableSlot
*slot
;
476 TupleHashTable hashtable
= CurTupleHashTable
;
477 int numCols
= hashtable
->numCols
;
478 AttrNumber
*keyColIdx
= hashtable
->keyColIdx
;
479 FmgrInfo
*hashfunctions
;
485 /* Process the current input tuple for the table */
486 slot
= hashtable
->inputslot
;
487 hashfunctions
= hashtable
->in_hash_funcs
;
491 /* Process a tuple already stored in the table */
492 /* (this case never actually occurs in current dynahash.c code) */
493 slot
= hashtable
->tableslot
;
494 ExecStoreMinimalTuple(tuple
, slot
, false);
495 hashfunctions
= hashtable
->tab_hash_funcs
;
498 for (i
= 0; i
< numCols
; i
++)
500 AttrNumber att
= keyColIdx
[i
];
504 /* rotate hashkey left 1 bit at each step */
505 hashkey
= (hashkey
<< 1) | ((hashkey
& 0x80000000) ? 1 : 0);
507 attr
= slot_getattr(slot
, att
, &isNull
);
509 if (!isNull
) /* treat nulls as having hash key 0 */
513 hkey
= DatumGetUInt32(FunctionCall1(&hashfunctions
[i
],
523 * See whether two tuples (presumably of the same hash value) match
525 * As above, the passed pointers are pointers to TupleHashEntryData.
527 * CurTupleHashTable must be set before calling this, since dynahash.c
528 * doesn't provide any API that would let us get at the hashtable otherwise.
530 * Also, the caller must select an appropriate memory context for running
531 * the compare functions. (dynahash.c doesn't change CurrentMemoryContext.)
534 TupleHashTableMatch(const void *key1
, const void *key2
, Size keysize
)
536 MinimalTuple tuple1
= ((const TupleHashEntryData
*) key1
)->firstTuple
;
538 #ifdef USE_ASSERT_CHECKING
539 MinimalTuple tuple2
= ((const TupleHashEntryData
*) key2
)->firstTuple
;
541 TupleTableSlot
*slot1
;
542 TupleTableSlot
*slot2
;
543 TupleHashTable hashtable
= CurTupleHashTable
;
546 * We assume that dynahash.c will only ever call us with the first
547 * argument being an actual table entry, and the second argument being
548 * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
549 * could be supported too, but is not currently used by dynahash.c.
551 Assert(tuple1
!= NULL
);
552 slot1
= hashtable
->tableslot
;
553 ExecStoreMinimalTuple(tuple1
, slot1
, false);
554 Assert(tuple2
== NULL
);
555 slot2
= hashtable
->inputslot
;
557 /* For crosstype comparisons, the inputslot must be first */
558 if (execTuplesMatch(slot2
,
561 hashtable
->keyColIdx
,
562 hashtable
->cur_eq_funcs
,