1 /*-------------------------------------------------------------------------
4 * Routines to handle INTERSECT and EXCEPT selection
6 * The input of a SetOp node consists of tuples from two relations,
7 * which have been combined into one dataset, with a junk attribute added
8 * that shows which relation each tuple came from. In SETOP_SORTED mode,
9 * the input has furthermore been sorted according to all the grouping
10 * columns (ie, all the non-junk attributes). The SetOp node scans each
11 * group of identical tuples to determine how many came from each input
12 * relation. Then it is a simple matter to emit the output demanded by the
13 * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
15 * In SETOP_HASHED mode, the input is delivered in no particular order,
16 * except that we know all the tuples from one input relation will come before
17 * all the tuples of the other. The planner guarantees that the first input
18 * relation is the left-hand one for EXCEPT, and tries to make the smaller
19 * input relation come first for INTERSECT. We build a hash table in memory
20 * with one entry for each group of identical tuples, and count the number of
21 * tuples in the group from each relation. After seeing all the input, we
22 * scan the hashtable and generate the correct output using those counts.
23 * We can avoid making hashtable entries for any tuples appearing only in the
24 * second input relation, since they cannot result in any output.
26 * This node type is not used for UNION or UNION ALL, since those can be
27 * implemented more cheaply (there's no need for the junk attribute to
28 * identify the source relation).
30 * Note that SetOp does no qual checking nor projection. The delivered
31 * output tuples are just copies of the first-to-arrive tuple in each
35 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
36 * Portions Copyright (c) 1994, Regents of the University of California
42 *-------------------------------------------------------------------------
47 #include "executor/executor.h"
48 #include "executor/nodeSetOp.h"
49 #include "utils/memutils.h"
53 * SetOpStatePerGroupData - per-group working state
55 * These values are working state that is initialized at the start of
56 * an input tuple group and updated for each input tuple.
58 * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
59 * the plan state node. In SETOP_HASHED mode, the hash table contains one
60 * of these for each tuple group.
62 typedef struct SetOpStatePerGroupData
64 long numLeft
; /* number of left-input dups in group */
65 long numRight
; /* number of right-input dups in group */
66 } SetOpStatePerGroupData
;
69 * To implement hashed mode, we need a hashtable that stores a
70 * representative tuple and the duplicate counts for each distinct set
71 * of grouping columns. We compute the hash key from the grouping columns.
73 typedef struct SetOpHashEntryData
*SetOpHashEntry
;
75 typedef struct SetOpHashEntryData
77 TupleHashEntryData shared
; /* common header for hash table entries */
78 SetOpStatePerGroupData pergroup
;
82 static TupleTableSlot
*setop_retrieve_direct(SetOpState
*setopstate
);
83 static void setop_fill_hash_table(SetOpState
*setopstate
);
84 static TupleTableSlot
*setop_retrieve_hash_table(SetOpState
*setopstate
);
88 * Initialize state for a new group of input values.
91 initialize_counts(SetOpStatePerGroup pergroup
)
93 pergroup
->numLeft
= pergroup
->numRight
= 0;
97 * Advance the appropriate counter for one input tuple.
100 advance_counts(SetOpStatePerGroup pergroup
, int flag
)
103 pergroup
->numRight
++;
109 * Fetch the "flag" column from an input tuple.
110 * This is an integer column with value 0 for left side, 1 for right side.
113 fetch_tuple_flag(SetOpState
*setopstate
, TupleTableSlot
*inputslot
)
115 SetOp
*node
= (SetOp
*) setopstate
->ps
.plan
;
119 flag
= DatumGetInt32(slot_getattr(inputslot
,
123 Assert(flag
== 0 || flag
== 1);
128 * Initialize the hash table to empty.
131 build_hash_table(SetOpState
*setopstate
)
133 SetOp
*node
= (SetOp
*) setopstate
->ps
.plan
;
135 Assert(node
->strategy
== SETOP_HASHED
);
136 Assert(node
->numGroups
> 0);
138 setopstate
->hashtable
= BuildTupleHashTable(node
->numCols
,
140 setopstate
->eqfunctions
,
141 setopstate
->hashfunctions
,
143 sizeof(SetOpHashEntryData
),
144 setopstate
->tableContext
,
145 setopstate
->tempContext
);
149 * We've completed processing a tuple group. Decide how many copies (if any)
150 * of its representative row to emit, and store the count into numOutput.
151 * This logic is straight from the SQL92 specification.
154 set_output_count(SetOpState
*setopstate
, SetOpStatePerGroup pergroup
)
156 SetOp
*plannode
= (SetOp
*) setopstate
->ps
.plan
;
158 switch (plannode
->cmd
)
160 case SETOPCMD_INTERSECT
:
161 if (pergroup
->numLeft
> 0 && pergroup
->numRight
> 0)
162 setopstate
->numOutput
= 1;
164 setopstate
->numOutput
= 0;
166 case SETOPCMD_INTERSECT_ALL
:
167 setopstate
->numOutput
=
168 (pergroup
->numLeft
< pergroup
->numRight
) ?
169 pergroup
->numLeft
: pergroup
->numRight
;
171 case SETOPCMD_EXCEPT
:
172 if (pergroup
->numLeft
> 0 && pergroup
->numRight
== 0)
173 setopstate
->numOutput
= 1;
175 setopstate
->numOutput
= 0;
177 case SETOPCMD_EXCEPT_ALL
:
178 setopstate
->numOutput
=
179 (pergroup
->numLeft
< pergroup
->numRight
) ?
180 0 : (pergroup
->numLeft
- pergroup
->numRight
);
183 elog(ERROR
, "unrecognized set op: %d", (int) plannode
->cmd
);
189 /* ----------------------------------------------------------------
191 * ----------------------------------------------------------------
193 TupleTableSlot
* /* return: a tuple or NULL */
194 ExecSetOp(SetOpState
*node
)
196 SetOp
*plannode
= (SetOp
*) node
->ps
.plan
;
197 TupleTableSlot
*resultTupleSlot
= node
->ps
.ps_ResultTupleSlot
;
200 * If the previously-returned tuple needs to be returned more than once,
203 if (node
->numOutput
> 0)
206 return resultTupleSlot
;
209 /* Otherwise, we're done if we are out of groups */
210 if (node
->setop_done
)
213 /* Fetch the next tuple group according to the correct strategy */
214 if (plannode
->strategy
== SETOP_HASHED
)
216 if (!node
->table_filled
)
217 setop_fill_hash_table(node
);
218 return setop_retrieve_hash_table(node
);
221 return setop_retrieve_direct(node
);
225 * ExecSetOp for non-hashed case
227 static TupleTableSlot
*
228 setop_retrieve_direct(SetOpState
*setopstate
)
230 SetOp
*node
= (SetOp
*) setopstate
->ps
.plan
;
231 PlanState
*outerPlan
;
232 SetOpStatePerGroup pergroup
;
233 TupleTableSlot
*outerslot
;
234 TupleTableSlot
*resultTupleSlot
;
237 * get state info from node
239 outerPlan
= outerPlanState(setopstate
);
240 pergroup
= setopstate
->pergroup
;
241 resultTupleSlot
= setopstate
->ps
.ps_ResultTupleSlot
;
244 * We loop retrieving groups until we find one we should return
246 while (!setopstate
->setop_done
)
249 * If we don't already have the first tuple of the new group, fetch it
250 * from the outer plan.
252 if (setopstate
->grp_firstTuple
== NULL
)
254 outerslot
= ExecProcNode(outerPlan
);
255 if (!TupIsNull(outerslot
))
257 /* Make a copy of the first input tuple */
258 setopstate
->grp_firstTuple
= ExecCopySlotTuple(outerslot
);
262 /* outer plan produced no tuples at all */
263 setopstate
->setop_done
= true;
269 * Store the copied first input tuple in the tuple table slot
270 * reserved for it. The tuple will be deleted when it is cleared
273 ExecStoreTuple(setopstate
->grp_firstTuple
,
277 setopstate
->grp_firstTuple
= NULL
; /* don't keep two pointers */
279 /* Initialize working state for a new input tuple group */
280 initialize_counts(pergroup
);
282 /* Count the first input tuple */
283 advance_counts(pergroup
,
284 fetch_tuple_flag(setopstate
, resultTupleSlot
));
287 * Scan the outer plan until we exhaust it or cross a group boundary.
291 outerslot
= ExecProcNode(outerPlan
);
292 if (TupIsNull(outerslot
))
294 /* no more outer-plan tuples available */
295 setopstate
->setop_done
= true;
300 * Check whether we've crossed a group boundary.
302 if (!execTuplesMatch(resultTupleSlot
,
304 node
->numCols
, node
->dupColIdx
,
305 setopstate
->eqfunctions
,
306 setopstate
->tempContext
))
309 * Save the first input tuple of the next group.
311 setopstate
->grp_firstTuple
= ExecCopySlotTuple(outerslot
);
315 /* Still in same group, so count this tuple */
316 advance_counts(pergroup
,
317 fetch_tuple_flag(setopstate
, outerslot
));
321 * Done scanning input tuple group. See if we should emit any
322 * copies of result tuple, and if so return the first copy.
324 set_output_count(setopstate
, pergroup
);
326 if (setopstate
->numOutput
> 0)
328 setopstate
->numOutput
--;
329 return resultTupleSlot
;
334 ExecClearTuple(resultTupleSlot
);
339 * ExecSetOp for hashed case: phase 1, read input and build hash table
342 setop_fill_hash_table(SetOpState
*setopstate
)
344 SetOp
*node
= (SetOp
*) setopstate
->ps
.plan
;
345 PlanState
*outerPlan
;
350 * get state info from node
352 outerPlan
= outerPlanState(setopstate
);
353 firstFlag
= node
->firstFlag
;
354 /* verify planner didn't mess up */
355 Assert(firstFlag
== 0 ||
357 (node
->cmd
== SETOPCMD_INTERSECT
||
358 node
->cmd
== SETOPCMD_INTERSECT_ALL
)));
361 * Process each outer-plan tuple, and then fetch the next one, until we
362 * exhaust the outer plan.
367 TupleTableSlot
*outerslot
;
369 SetOpHashEntry entry
;
372 outerslot
= ExecProcNode(outerPlan
);
373 if (TupIsNull(outerslot
))
376 /* Identify whether it's left or right input */
377 flag
= fetch_tuple_flag(setopstate
, outerslot
);
379 if (flag
== firstFlag
)
381 /* (still) in first input relation */
382 Assert(in_first_rel
);
384 /* Find or build hashtable entry for this tuple's group */
385 entry
= (SetOpHashEntry
)
386 LookupTupleHashEntry(setopstate
->hashtable
, outerslot
, &isnew
);
388 /* If new tuple group, initialize counts */
390 initialize_counts(&entry
->pergroup
);
392 /* Advance the counts */
393 advance_counts(&entry
->pergroup
, flag
);
397 /* reached second relation */
398 in_first_rel
= false;
400 /* For tuples not seen previously, do not make hashtable entry */
401 entry
= (SetOpHashEntry
)
402 LookupTupleHashEntry(setopstate
->hashtable
, outerslot
, NULL
);
404 /* Advance the counts if entry is already present */
406 advance_counts(&entry
->pergroup
, flag
);
409 /* Must reset temp context after each hashtable lookup */
410 MemoryContextReset(setopstate
->tempContext
);
413 setopstate
->table_filled
= true;
414 /* Initialize to walk the hash table */
415 ResetTupleHashIterator(setopstate
->hashtable
, &setopstate
->hashiter
);
419 * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
421 static TupleTableSlot
*
422 setop_retrieve_hash_table(SetOpState
*setopstate
)
424 SetOpHashEntry entry
;
425 TupleTableSlot
*resultTupleSlot
;
428 * get state info from node
430 resultTupleSlot
= setopstate
->ps
.ps_ResultTupleSlot
;
433 * We loop retrieving groups until we find one we should return
435 while (!setopstate
->setop_done
)
438 * Find the next entry in the hash table
440 entry
= (SetOpHashEntry
) ScanTupleHashTable(&setopstate
->hashiter
);
443 /* No more entries in hashtable, so done */
444 setopstate
->setop_done
= true;
449 * See if we should emit any copies of this tuple, and if so return
452 set_output_count(setopstate
, &entry
->pergroup
);
454 if (setopstate
->numOutput
> 0)
456 setopstate
->numOutput
--;
457 return ExecStoreMinimalTuple(entry
->shared
.firstTuple
,
464 ExecClearTuple(resultTupleSlot
);
468 /* ----------------------------------------------------------------
471 * This initializes the setop node state structures and
472 * the node's subplan.
473 * ----------------------------------------------------------------
476 ExecInitSetOp(SetOp
*node
, EState
*estate
, int eflags
)
478 SetOpState
*setopstate
;
480 /* check for unsupported flags */
481 Assert(!(eflags
& (EXEC_FLAG_BACKWARD
| EXEC_FLAG_MARK
)));
484 * create state structure
486 setopstate
= makeNode(SetOpState
);
487 setopstate
->ps
.plan
= (Plan
*) node
;
488 setopstate
->ps
.state
= estate
;
490 setopstate
->eqfunctions
= NULL
;
491 setopstate
->hashfunctions
= NULL
;
492 setopstate
->setop_done
= false;
493 setopstate
->numOutput
= 0;
494 setopstate
->pergroup
= NULL
;
495 setopstate
->grp_firstTuple
= NULL
;
496 setopstate
->hashtable
= NULL
;
497 setopstate
->tableContext
= NULL
;
500 * Miscellaneous initialization
502 * SetOp nodes have no ExprContext initialization because they never call
503 * ExecQual or ExecProject. But they do need a per-tuple memory context
504 * anyway for calling execTuplesMatch.
506 setopstate
->tempContext
=
507 AllocSetContextCreate(CurrentMemoryContext
,
509 ALLOCSET_DEFAULT_MINSIZE
,
510 ALLOCSET_DEFAULT_INITSIZE
,
511 ALLOCSET_DEFAULT_MAXSIZE
);
514 * If hashing, we also need a longer-lived context to store the hash
515 * table. The table can't just be kept in the per-query context because
516 * we want to be able to throw it away in ExecReScanSetOp.
518 if (node
->strategy
== SETOP_HASHED
)
519 setopstate
->tableContext
=
520 AllocSetContextCreate(CurrentMemoryContext
,
522 ALLOCSET_DEFAULT_MINSIZE
,
523 ALLOCSET_DEFAULT_INITSIZE
,
524 ALLOCSET_DEFAULT_MAXSIZE
);
526 #define SETOP_NSLOTS 1
529 * Tuple table initialization
531 ExecInitResultTupleSlot(estate
, &setopstate
->ps
);
534 * initialize child nodes
536 * If we are hashing then the child plan does not need
537 * to handle REWIND efficiently; see ExecReScanSetOp.
539 if (node
->strategy
== SETOP_HASHED
)
540 eflags
&= ~EXEC_FLAG_REWIND
;
541 outerPlanState(setopstate
) = ExecInitNode(outerPlan(node
), estate
, eflags
);
544 * setop nodes do no projections, so initialize projection info for this
547 ExecAssignResultTypeFromTL(&setopstate
->ps
);
548 setopstate
->ps
.ps_ProjInfo
= NULL
;
551 * Precompute fmgr lookup data for inner loop. We need both equality and
552 * hashing functions to do it by hashing, but only equality if not
555 if (node
->strategy
== SETOP_HASHED
)
556 execTuplesHashPrepare(node
->numCols
,
558 &setopstate
->eqfunctions
,
559 &setopstate
->hashfunctions
);
561 setopstate
->eqfunctions
=
562 execTuplesMatchPrepare(node
->numCols
,
565 if (node
->strategy
== SETOP_HASHED
)
567 build_hash_table(setopstate
);
568 setopstate
->table_filled
= false;
572 setopstate
->pergroup
=
573 (SetOpStatePerGroup
) palloc0(sizeof(SetOpStatePerGroupData
));
580 ExecCountSlotsSetOp(SetOp
*node
)
582 return ExecCountSlotsNode(outerPlan(node
)) +
583 ExecCountSlotsNode(innerPlan(node
)) +
587 /* ----------------------------------------------------------------
590 * This shuts down the subplan and frees resources allocated
592 * ----------------------------------------------------------------
595 ExecEndSetOp(SetOpState
*node
)
597 /* clean up tuple table */
598 ExecClearTuple(node
->ps
.ps_ResultTupleSlot
);
600 /* free subsidiary stuff including hashtable */
601 MemoryContextDelete(node
->tempContext
);
602 if (node
->tableContext
)
603 MemoryContextDelete(node
->tableContext
);
605 ExecEndNode(outerPlanState(node
));
610 ExecReScanSetOp(SetOpState
*node
, ExprContext
*exprCtxt
)
612 ExecClearTuple(node
->ps
.ps_ResultTupleSlot
);
613 node
->setop_done
= false;
616 if (((SetOp
*) node
->ps
.plan
)->strategy
== SETOP_HASHED
)
619 * In the hashed case, if we haven't yet built the hash table then we
620 * can just return; nothing done yet, so nothing to undo. If subnode's
621 * chgParam is not NULL then it will be re-scanned by ExecProcNode,
622 * else no reason to re-scan it at all.
624 if (!node
->table_filled
)
628 * If we do have the hash table and the subplan does not have any
629 * parameter changes, then we can just rescan the existing hash table;
630 * no need to build it again.
632 if (((PlanState
*) node
)->lefttree
->chgParam
== NULL
)
634 ResetTupleHashIterator(node
->hashtable
, &node
->hashiter
);
639 /* Release first tuple of group, if we have made a copy */
640 if (node
->grp_firstTuple
!= NULL
)
642 heap_freetuple(node
->grp_firstTuple
);
643 node
->grp_firstTuple
= NULL
;
646 /* Release any hashtable storage */
647 if (node
->tableContext
)
648 MemoryContextResetAndDeleteChildren(node
->tableContext
);
650 /* And rebuild empty hashtable if needed */
651 if (((SetOp
*) node
->ps
.plan
)->strategy
== SETOP_HASHED
)
653 build_hash_table(node
);
654 node
->table_filled
= false;
658 * if chgParam of subnode is not null then plan will be re-scanned by
659 * first ExecProcNode.
661 if (((PlanState
*) node
)->lefttree
->chgParam
== NULL
)
662 ExecReScan(((PlanState
*) node
)->lefttree
, exprCtxt
);