Add support for user-defined I/O conversion casts.
[PostgreSQL.git] / src / backend / executor / nodeSetOp.c
blob014043ce59447353e20e046f5a8ff9eb5f337e5e
1 /*-------------------------------------------------------------------------
3 * nodeSetOp.c
4 * Routines to handle INTERSECT and EXCEPT selection
6 * The input of a SetOp node consists of tuples from two relations,
7 * which have been combined into one dataset, with a junk attribute added
8 * that shows which relation each tuple came from. In SETOP_SORTED mode,
9 * the input has furthermore been sorted according to all the grouping
10 * columns (ie, all the non-junk attributes). The SetOp node scans each
11 * group of identical tuples to determine how many came from each input
12 * relation. Then it is a simple matter to emit the output demanded by the
13 * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
15 * In SETOP_HASHED mode, the input is delivered in no particular order,
16 * except that we know all the tuples from one input relation will come before
17 * all the tuples of the other. The planner guarantees that the first input
18 * relation is the left-hand one for EXCEPT, and tries to make the smaller
19 * input relation come first for INTERSECT. We build a hash table in memory
20 * with one entry for each group of identical tuples, and count the number of
21 * tuples in the group from each relation. After seeing all the input, we
22 * scan the hashtable and generate the correct output using those counts.
23 * We can avoid making hashtable entries for any tuples appearing only in the
24 * second input relation, since they cannot result in any output.
26 * This node type is not used for UNION or UNION ALL, since those can be
27 * implemented more cheaply (there's no need for the junk attribute to
28 * identify the source relation).
30 * Note that SetOp does no qual checking nor projection. The delivered
31 * output tuples are just copies of the first-to-arrive tuple in each
32 * input group.
35 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
36 * Portions Copyright (c) 1994, Regents of the University of California
39 * IDENTIFICATION
40 * $PostgreSQL$
42 *-------------------------------------------------------------------------
45 #include "postgres.h"
47 #include "executor/executor.h"
48 #include "executor/nodeSetOp.h"
49 #include "utils/memutils.h"
53 * SetOpStatePerGroupData - per-group working state
55 * These values are working state that is initialized at the start of
56 * an input tuple group and updated for each input tuple.
58 * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
59 * the plan state node. In SETOP_HASHED mode, the hash table contains one
60 * of these for each tuple group.
62 typedef struct SetOpStatePerGroupData
64 long numLeft; /* number of left-input dups in group */
65 long numRight; /* number of right-input dups in group */
66 } SetOpStatePerGroupData;
69 * To implement hashed mode, we need a hashtable that stores a
70 * representative tuple and the duplicate counts for each distinct set
71 * of grouping columns. We compute the hash key from the grouping columns.
73 typedef struct SetOpHashEntryData *SetOpHashEntry;
75 typedef struct SetOpHashEntryData
77 TupleHashEntryData shared; /* common header for hash table entries */
78 SetOpStatePerGroupData pergroup;
79 } SetOpHashEntryData;
82 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
83 static void setop_fill_hash_table(SetOpState *setopstate);
84 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
88 * Initialize state for a new group of input values.
90 static inline void
91 initialize_counts(SetOpStatePerGroup pergroup)
93 pergroup->numLeft = pergroup->numRight = 0;
97 * Advance the appropriate counter for one input tuple.
99 static inline void
100 advance_counts(SetOpStatePerGroup pergroup, int flag)
102 if (flag)
103 pergroup->numRight++;
104 else
105 pergroup->numLeft++;
109 * Fetch the "flag" column from an input tuple.
110 * This is an integer column with value 0 for left side, 1 for right side.
112 static int
113 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
115 SetOp *node = (SetOp *) setopstate->ps.plan;
116 int flag;
117 bool isNull;
119 flag = DatumGetInt32(slot_getattr(inputslot,
120 node->flagColIdx,
121 &isNull));
122 Assert(!isNull);
123 Assert(flag == 0 || flag == 1);
124 return flag;
128 * Initialize the hash table to empty.
130 static void
131 build_hash_table(SetOpState *setopstate)
133 SetOp *node = (SetOp *) setopstate->ps.plan;
135 Assert(node->strategy == SETOP_HASHED);
136 Assert(node->numGroups > 0);
138 setopstate->hashtable = BuildTupleHashTable(node->numCols,
139 node->dupColIdx,
140 setopstate->eqfunctions,
141 setopstate->hashfunctions,
142 node->numGroups,
143 sizeof(SetOpHashEntryData),
144 setopstate->tableContext,
145 setopstate->tempContext);
149 * We've completed processing a tuple group. Decide how many copies (if any)
150 * of its representative row to emit, and store the count into numOutput.
151 * This logic is straight from the SQL92 specification.
153 static void
154 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
156 SetOp *plannode = (SetOp *) setopstate->ps.plan;
158 switch (plannode->cmd)
160 case SETOPCMD_INTERSECT:
161 if (pergroup->numLeft > 0 && pergroup->numRight > 0)
162 setopstate->numOutput = 1;
163 else
164 setopstate->numOutput = 0;
165 break;
166 case SETOPCMD_INTERSECT_ALL:
167 setopstate->numOutput =
168 (pergroup->numLeft < pergroup->numRight) ?
169 pergroup->numLeft : pergroup->numRight;
170 break;
171 case SETOPCMD_EXCEPT:
172 if (pergroup->numLeft > 0 && pergroup->numRight == 0)
173 setopstate->numOutput = 1;
174 else
175 setopstate->numOutput = 0;
176 break;
177 case SETOPCMD_EXCEPT_ALL:
178 setopstate->numOutput =
179 (pergroup->numLeft < pergroup->numRight) ?
180 0 : (pergroup->numLeft - pergroup->numRight);
181 break;
182 default:
183 elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
184 break;
189 /* ----------------------------------------------------------------
190 * ExecSetOp
191 * ----------------------------------------------------------------
193 TupleTableSlot * /* return: a tuple or NULL */
194 ExecSetOp(SetOpState *node)
196 SetOp *plannode = (SetOp *) node->ps.plan;
197 TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
200 * If the previously-returned tuple needs to be returned more than once,
201 * keep returning it.
203 if (node->numOutput > 0)
205 node->numOutput--;
206 return resultTupleSlot;
209 /* Otherwise, we're done if we are out of groups */
210 if (node->setop_done)
211 return NULL;
213 /* Fetch the next tuple group according to the correct strategy */
214 if (plannode->strategy == SETOP_HASHED)
216 if (!node->table_filled)
217 setop_fill_hash_table(node);
218 return setop_retrieve_hash_table(node);
220 else
221 return setop_retrieve_direct(node);
225 * ExecSetOp for non-hashed case
227 static TupleTableSlot *
228 setop_retrieve_direct(SetOpState *setopstate)
230 SetOp *node = (SetOp *) setopstate->ps.plan;
231 PlanState *outerPlan;
232 SetOpStatePerGroup pergroup;
233 TupleTableSlot *outerslot;
234 TupleTableSlot *resultTupleSlot;
237 * get state info from node
239 outerPlan = outerPlanState(setopstate);
240 pergroup = setopstate->pergroup;
241 resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
244 * We loop retrieving groups until we find one we should return
246 while (!setopstate->setop_done)
249 * If we don't already have the first tuple of the new group, fetch it
250 * from the outer plan.
252 if (setopstate->grp_firstTuple == NULL)
254 outerslot = ExecProcNode(outerPlan);
255 if (!TupIsNull(outerslot))
257 /* Make a copy of the first input tuple */
258 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
260 else
262 /* outer plan produced no tuples at all */
263 setopstate->setop_done = true;
264 return NULL;
269 * Store the copied first input tuple in the tuple table slot
270 * reserved for it. The tuple will be deleted when it is cleared
271 * from the slot.
273 ExecStoreTuple(setopstate->grp_firstTuple,
274 resultTupleSlot,
275 InvalidBuffer,
276 true);
277 setopstate->grp_firstTuple = NULL; /* don't keep two pointers */
279 /* Initialize working state for a new input tuple group */
280 initialize_counts(pergroup);
282 /* Count the first input tuple */
283 advance_counts(pergroup,
284 fetch_tuple_flag(setopstate, resultTupleSlot));
287 * Scan the outer plan until we exhaust it or cross a group boundary.
289 for (;;)
291 outerslot = ExecProcNode(outerPlan);
292 if (TupIsNull(outerslot))
294 /* no more outer-plan tuples available */
295 setopstate->setop_done = true;
296 break;
300 * Check whether we've crossed a group boundary.
302 if (!execTuplesMatch(resultTupleSlot,
303 outerslot,
304 node->numCols, node->dupColIdx,
305 setopstate->eqfunctions,
306 setopstate->tempContext))
309 * Save the first input tuple of the next group.
311 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
312 break;
315 /* Still in same group, so count this tuple */
316 advance_counts(pergroup,
317 fetch_tuple_flag(setopstate, outerslot));
321 * Done scanning input tuple group. See if we should emit any
322 * copies of result tuple, and if so return the first copy.
324 set_output_count(setopstate, pergroup);
326 if (setopstate->numOutput > 0)
328 setopstate->numOutput--;
329 return resultTupleSlot;
333 /* No more groups */
334 ExecClearTuple(resultTupleSlot);
335 return NULL;
339 * ExecSetOp for hashed case: phase 1, read input and build hash table
341 static void
342 setop_fill_hash_table(SetOpState *setopstate)
344 SetOp *node = (SetOp *) setopstate->ps.plan;
345 PlanState *outerPlan;
346 int firstFlag;
347 bool in_first_rel;
350 * get state info from node
352 outerPlan = outerPlanState(setopstate);
353 firstFlag = node->firstFlag;
354 /* verify planner didn't mess up */
355 Assert(firstFlag == 0 ||
356 (firstFlag == 1 &&
357 (node->cmd == SETOPCMD_INTERSECT ||
358 node->cmd == SETOPCMD_INTERSECT_ALL)));
361 * Process each outer-plan tuple, and then fetch the next one, until we
362 * exhaust the outer plan.
364 in_first_rel = true;
365 for (;;)
367 TupleTableSlot *outerslot;
368 int flag;
369 SetOpHashEntry entry;
370 bool isnew;
372 outerslot = ExecProcNode(outerPlan);
373 if (TupIsNull(outerslot))
374 break;
376 /* Identify whether it's left or right input */
377 flag = fetch_tuple_flag(setopstate, outerslot);
379 if (flag == firstFlag)
381 /* (still) in first input relation */
382 Assert(in_first_rel);
384 /* Find or build hashtable entry for this tuple's group */
385 entry = (SetOpHashEntry)
386 LookupTupleHashEntry(setopstate->hashtable, outerslot, &isnew);
388 /* If new tuple group, initialize counts */
389 if (isnew)
390 initialize_counts(&entry->pergroup);
392 /* Advance the counts */
393 advance_counts(&entry->pergroup, flag);
395 else
397 /* reached second relation */
398 in_first_rel = false;
400 /* For tuples not seen previously, do not make hashtable entry */
401 entry = (SetOpHashEntry)
402 LookupTupleHashEntry(setopstate->hashtable, outerslot, NULL);
404 /* Advance the counts if entry is already present */
405 if (entry)
406 advance_counts(&entry->pergroup, flag);
409 /* Must reset temp context after each hashtable lookup */
410 MemoryContextReset(setopstate->tempContext);
413 setopstate->table_filled = true;
414 /* Initialize to walk the hash table */
415 ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
419 * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
421 static TupleTableSlot *
422 setop_retrieve_hash_table(SetOpState *setopstate)
424 SetOpHashEntry entry;
425 TupleTableSlot *resultTupleSlot;
428 * get state info from node
430 resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
433 * We loop retrieving groups until we find one we should return
435 while (!setopstate->setop_done)
438 * Find the next entry in the hash table
440 entry = (SetOpHashEntry) ScanTupleHashTable(&setopstate->hashiter);
441 if (entry == NULL)
443 /* No more entries in hashtable, so done */
444 setopstate->setop_done = true;
445 return NULL;
449 * See if we should emit any copies of this tuple, and if so return
450 * the first copy.
452 set_output_count(setopstate, &entry->pergroup);
454 if (setopstate->numOutput > 0)
456 setopstate->numOutput--;
457 return ExecStoreMinimalTuple(entry->shared.firstTuple,
458 resultTupleSlot,
459 false);
463 /* No more groups */
464 ExecClearTuple(resultTupleSlot);
465 return NULL;
468 /* ----------------------------------------------------------------
469 * ExecInitSetOp
471 * This initializes the setop node state structures and
472 * the node's subplan.
473 * ----------------------------------------------------------------
475 SetOpState *
476 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
478 SetOpState *setopstate;
480 /* check for unsupported flags */
481 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
484 * create state structure
486 setopstate = makeNode(SetOpState);
487 setopstate->ps.plan = (Plan *) node;
488 setopstate->ps.state = estate;
490 setopstate->eqfunctions = NULL;
491 setopstate->hashfunctions = NULL;
492 setopstate->setop_done = false;
493 setopstate->numOutput = 0;
494 setopstate->pergroup = NULL;
495 setopstate->grp_firstTuple = NULL;
496 setopstate->hashtable = NULL;
497 setopstate->tableContext = NULL;
500 * Miscellaneous initialization
502 * SetOp nodes have no ExprContext initialization because they never call
503 * ExecQual or ExecProject. But they do need a per-tuple memory context
504 * anyway for calling execTuplesMatch.
506 setopstate->tempContext =
507 AllocSetContextCreate(CurrentMemoryContext,
508 "SetOp",
509 ALLOCSET_DEFAULT_MINSIZE,
510 ALLOCSET_DEFAULT_INITSIZE,
511 ALLOCSET_DEFAULT_MAXSIZE);
514 * If hashing, we also need a longer-lived context to store the hash
515 * table. The table can't just be kept in the per-query context because
516 * we want to be able to throw it away in ExecReScanSetOp.
518 if (node->strategy == SETOP_HASHED)
519 setopstate->tableContext =
520 AllocSetContextCreate(CurrentMemoryContext,
521 "SetOp hash table",
522 ALLOCSET_DEFAULT_MINSIZE,
523 ALLOCSET_DEFAULT_INITSIZE,
524 ALLOCSET_DEFAULT_MAXSIZE);
526 #define SETOP_NSLOTS 1
529 * Tuple table initialization
531 ExecInitResultTupleSlot(estate, &setopstate->ps);
534 * initialize child nodes
536 * If we are hashing then the child plan does not need
537 * to handle REWIND efficiently; see ExecReScanSetOp.
539 if (node->strategy == SETOP_HASHED)
540 eflags &= ~EXEC_FLAG_REWIND;
541 outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
544 * setop nodes do no projections, so initialize projection info for this
545 * node appropriately
547 ExecAssignResultTypeFromTL(&setopstate->ps);
548 setopstate->ps.ps_ProjInfo = NULL;
551 * Precompute fmgr lookup data for inner loop. We need both equality and
552 * hashing functions to do it by hashing, but only equality if not
553 * hashing.
555 if (node->strategy == SETOP_HASHED)
556 execTuplesHashPrepare(node->numCols,
557 node->dupOperators,
558 &setopstate->eqfunctions,
559 &setopstate->hashfunctions);
560 else
561 setopstate->eqfunctions =
562 execTuplesMatchPrepare(node->numCols,
563 node->dupOperators);
565 if (node->strategy == SETOP_HASHED)
567 build_hash_table(setopstate);
568 setopstate->table_filled = false;
570 else
572 setopstate->pergroup =
573 (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
576 return setopstate;
580 ExecCountSlotsSetOp(SetOp *node)
582 return ExecCountSlotsNode(outerPlan(node)) +
583 ExecCountSlotsNode(innerPlan(node)) +
584 SETOP_NSLOTS;
587 /* ----------------------------------------------------------------
588 * ExecEndSetOp
590 * This shuts down the subplan and frees resources allocated
591 * to this node.
592 * ----------------------------------------------------------------
594 void
595 ExecEndSetOp(SetOpState *node)
597 /* clean up tuple table */
598 ExecClearTuple(node->ps.ps_ResultTupleSlot);
600 /* free subsidiary stuff including hashtable */
601 MemoryContextDelete(node->tempContext);
602 if (node->tableContext)
603 MemoryContextDelete(node->tableContext);
605 ExecEndNode(outerPlanState(node));
609 void
610 ExecReScanSetOp(SetOpState *node, ExprContext *exprCtxt)
612 ExecClearTuple(node->ps.ps_ResultTupleSlot);
613 node->setop_done = false;
614 node->numOutput = 0;
616 if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
619 * In the hashed case, if we haven't yet built the hash table then we
620 * can just return; nothing done yet, so nothing to undo. If subnode's
621 * chgParam is not NULL then it will be re-scanned by ExecProcNode,
622 * else no reason to re-scan it at all.
624 if (!node->table_filled)
625 return;
628 * If we do have the hash table and the subplan does not have any
629 * parameter changes, then we can just rescan the existing hash table;
630 * no need to build it again.
632 if (((PlanState *) node)->lefttree->chgParam == NULL)
634 ResetTupleHashIterator(node->hashtable, &node->hashiter);
635 return;
639 /* Release first tuple of group, if we have made a copy */
640 if (node->grp_firstTuple != NULL)
642 heap_freetuple(node->grp_firstTuple);
643 node->grp_firstTuple = NULL;
646 /* Release any hashtable storage */
647 if (node->tableContext)
648 MemoryContextResetAndDeleteChildren(node->tableContext);
650 /* And rebuild empty hashtable if needed */
651 if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
653 build_hash_table(node);
654 node->table_filled = false;
658 * if chgParam of subnode is not null then plan will be re-scanned by
659 * first ExecProcNode.
661 if (((PlanState *) node)->lefttree->chgParam == NULL)
662 ExecReScan(((PlanState *) node)->lefttree, exprCtxt);