1 /*-------------------------------------------------------------------------
4 * Routines to handle INTERSECT and EXCEPT selection
6 * The input of a SetOp node consists of tuples from two relations,
7 * which have been combined into one dataset, with a junk attribute added
8 * that shows which relation each tuple came from. In SETOP_SORTED mode,
9 * the input has furthermore been sorted according to all the grouping
10 * columns (ie, all the non-junk attributes). The SetOp node scans each
11 * group of identical tuples to determine how many came from each input
12 * relation. Then it is a simple matter to emit the output demanded by the
13 * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
15 * In SETOP_HASHED mode, the input is delivered in no particular order,
16 * except that we know all the tuples from one input relation will come before
17 * all the tuples of the other. The planner guarantees that the first input
18 * relation is the left-hand one for EXCEPT, and tries to make the smaller
19 * input relation come first for INTERSECT. We build a hash table in memory
20 * with one entry for each group of identical tuples, and count the number of
21 * tuples in the group from each relation. After seeing all the input, we
22 * scan the hashtable and generate the correct output using those counts.
23 * We can avoid making hashtable entries for any tuples appearing only in the
24 * second input relation, since they cannot result in any output.
26 * This node type is not used for UNION or UNION ALL, since those can be
27 * implemented more cheaply (there's no need for the junk attribute to
28 * identify the source relation).
30 * Note that SetOp does no qual checking nor projection. The delivered
31 * output tuples are just copies of the first-to-arrive tuple in each
35 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
36 * Portions Copyright (c) 1994, Regents of the University of California
40 * src/backend/executor/nodeSetOp.c
42 *-------------------------------------------------------------------------
47 #include "access/htup_details.h"
48 #include "executor/executor.h"
49 #include "executor/nodeSetOp.h"
50 #include "miscadmin.h"
51 #include "utils/memutils.h"
55 * SetOpStatePerGroupData - per-group working state
57 * These values are working state that is initialized at the start of
58 * an input tuple group and updated for each input tuple.
60 * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
61 * the plan state node. In SETOP_HASHED mode, the hash table contains one
62 * of these for each tuple group.
64 typedef struct SetOpStatePerGroupData
66 long numLeft
; /* number of left-input dups in group */
67 long numRight
; /* number of right-input dups in group */
68 } SetOpStatePerGroupData
;
71 static TupleTableSlot
*setop_retrieve_direct(SetOpState
*setopstate
);
72 static void setop_fill_hash_table(SetOpState
*setopstate
);
73 static TupleTableSlot
*setop_retrieve_hash_table(SetOpState
*setopstate
);
77 * Initialize state for a new group of input values.
80 initialize_counts(SetOpStatePerGroup pergroup
)
82 pergroup
->numLeft
= pergroup
->numRight
= 0;
86 * Advance the appropriate counter for one input tuple.
89 advance_counts(SetOpStatePerGroup pergroup
, int flag
)
98 * Fetch the "flag" column from an input tuple.
99 * This is an integer column with value 0 for left side, 1 for right side.
102 fetch_tuple_flag(SetOpState
*setopstate
, TupleTableSlot
*inputslot
)
104 SetOp
*node
= (SetOp
*) setopstate
->ps
.plan
;
108 flag
= DatumGetInt32(slot_getattr(inputslot
,
112 Assert(flag
== 0 || flag
== 1);
117 * Initialize the hash table to empty.
120 build_hash_table(SetOpState
*setopstate
)
122 SetOp
*node
= (SetOp
*) setopstate
->ps
.plan
;
123 ExprContext
*econtext
= setopstate
->ps
.ps_ExprContext
;
124 TupleDesc desc
= ExecGetResultType(outerPlanState(setopstate
));
126 Assert(node
->strategy
== SETOP_HASHED
);
127 Assert(node
->numGroups
> 0);
129 setopstate
->hashtable
= BuildTupleHashTableExt(&setopstate
->ps
,
133 setopstate
->eqfuncoids
,
134 setopstate
->hashfunctions
,
138 setopstate
->ps
.state
->es_query_cxt
,
139 setopstate
->tableContext
,
140 econtext
->ecxt_per_tuple_memory
,
145 * We've completed processing a tuple group. Decide how many copies (if any)
146 * of its representative row to emit, and store the count into numOutput.
147 * This logic is straight from the SQL92 specification.
150 set_output_count(SetOpState
*setopstate
, SetOpStatePerGroup pergroup
)
152 SetOp
*plannode
= (SetOp
*) setopstate
->ps
.plan
;
154 switch (plannode
->cmd
)
156 case SETOPCMD_INTERSECT
:
157 if (pergroup
->numLeft
> 0 && pergroup
->numRight
> 0)
158 setopstate
->numOutput
= 1;
160 setopstate
->numOutput
= 0;
162 case SETOPCMD_INTERSECT_ALL
:
163 setopstate
->numOutput
=
164 (pergroup
->numLeft
< pergroup
->numRight
) ?
165 pergroup
->numLeft
: pergroup
->numRight
;
167 case SETOPCMD_EXCEPT
:
168 if (pergroup
->numLeft
> 0 && pergroup
->numRight
== 0)
169 setopstate
->numOutput
= 1;
171 setopstate
->numOutput
= 0;
173 case SETOPCMD_EXCEPT_ALL
:
174 setopstate
->numOutput
=
175 (pergroup
->numLeft
< pergroup
->numRight
) ?
176 0 : (pergroup
->numLeft
- pergroup
->numRight
);
179 elog(ERROR
, "unrecognized set op: %d", (int) plannode
->cmd
);
185 /* ----------------------------------------------------------------
187 * ----------------------------------------------------------------
189 static TupleTableSlot
* /* return: a tuple or NULL */
190 ExecSetOp(PlanState
*pstate
)
192 SetOpState
*node
= castNode(SetOpState
, pstate
);
193 SetOp
*plannode
= (SetOp
*) node
->ps
.plan
;
194 TupleTableSlot
*resultTupleSlot
= node
->ps
.ps_ResultTupleSlot
;
196 CHECK_FOR_INTERRUPTS();
199 * If the previously-returned tuple needs to be returned more than once,
202 if (node
->numOutput
> 0)
205 return resultTupleSlot
;
208 /* Otherwise, we're done if we are out of groups */
209 if (node
->setop_done
)
212 /* Fetch the next tuple group according to the correct strategy */
213 if (plannode
->strategy
== SETOP_HASHED
)
215 if (!node
->table_filled
)
216 setop_fill_hash_table(node
);
217 return setop_retrieve_hash_table(node
);
220 return setop_retrieve_direct(node
);
224 * ExecSetOp for non-hashed case
226 static TupleTableSlot
*
227 setop_retrieve_direct(SetOpState
*setopstate
)
229 PlanState
*outerPlan
;
230 SetOpStatePerGroup pergroup
;
231 TupleTableSlot
*outerslot
;
232 TupleTableSlot
*resultTupleSlot
;
233 ExprContext
*econtext
= setopstate
->ps
.ps_ExprContext
;
236 * get state info from node
238 outerPlan
= outerPlanState(setopstate
);
239 pergroup
= (SetOpStatePerGroup
) setopstate
->pergroup
;
240 resultTupleSlot
= setopstate
->ps
.ps_ResultTupleSlot
;
243 * We loop retrieving groups until we find one we should return
245 while (!setopstate
->setop_done
)
248 * If we don't already have the first tuple of the new group, fetch it
249 * from the outer plan.
251 if (setopstate
->grp_firstTuple
== NULL
)
253 outerslot
= ExecProcNode(outerPlan
);
254 if (!TupIsNull(outerslot
))
256 /* Make a copy of the first input tuple */
257 setopstate
->grp_firstTuple
= ExecCopySlotHeapTuple(outerslot
);
261 /* outer plan produced no tuples at all */
262 setopstate
->setop_done
= true;
268 * Store the copied first input tuple in the tuple table slot reserved
269 * for it. The tuple will be deleted when it is cleared from the
272 ExecStoreHeapTuple(setopstate
->grp_firstTuple
,
275 setopstate
->grp_firstTuple
= NULL
; /* don't keep two pointers */
277 /* Initialize working state for a new input tuple group */
278 initialize_counts(pergroup
);
280 /* Count the first input tuple */
281 advance_counts(pergroup
,
282 fetch_tuple_flag(setopstate
, resultTupleSlot
));
285 * Scan the outer plan until we exhaust it or cross a group boundary.
289 outerslot
= ExecProcNode(outerPlan
);
290 if (TupIsNull(outerslot
))
292 /* no more outer-plan tuples available */
293 setopstate
->setop_done
= true;
298 * Check whether we've crossed a group boundary.
300 econtext
->ecxt_outertuple
= resultTupleSlot
;
301 econtext
->ecxt_innertuple
= outerslot
;
303 if (!ExecQualAndReset(setopstate
->eqfunction
, econtext
))
306 * Save the first input tuple of the next group.
308 setopstate
->grp_firstTuple
= ExecCopySlotHeapTuple(outerslot
);
312 /* Still in same group, so count this tuple */
313 advance_counts(pergroup
,
314 fetch_tuple_flag(setopstate
, outerslot
));
318 * Done scanning input tuple group. See if we should emit any copies
319 * of result tuple, and if so return the first copy.
321 set_output_count(setopstate
, pergroup
);
323 if (setopstate
->numOutput
> 0)
325 setopstate
->numOutput
--;
326 return resultTupleSlot
;
331 ExecClearTuple(resultTupleSlot
);
336 * ExecSetOp for hashed case: phase 1, read input and build hash table
339 setop_fill_hash_table(SetOpState
*setopstate
)
341 SetOp
*node
= (SetOp
*) setopstate
->ps
.plan
;
342 PlanState
*outerPlan
;
344 bool in_first_rel PG_USED_FOR_ASSERTS_ONLY
;
345 ExprContext
*econtext
= setopstate
->ps
.ps_ExprContext
;
348 * get state info from node
350 outerPlan
= outerPlanState(setopstate
);
351 firstFlag
= node
->firstFlag
;
352 /* verify planner didn't mess up */
353 Assert(firstFlag
== 0 ||
355 (node
->cmd
== SETOPCMD_INTERSECT
||
356 node
->cmd
== SETOPCMD_INTERSECT_ALL
)));
359 * Process each outer-plan tuple, and then fetch the next one, until we
360 * exhaust the outer plan.
365 TupleTableSlot
*outerslot
;
367 TupleHashEntryData
*entry
;
370 outerslot
= ExecProcNode(outerPlan
);
371 if (TupIsNull(outerslot
))
374 /* Identify whether it's left or right input */
375 flag
= fetch_tuple_flag(setopstate
, outerslot
);
377 if (flag
== firstFlag
)
379 /* (still) in first input relation */
380 Assert(in_first_rel
);
382 /* Find or build hashtable entry for this tuple's group */
383 entry
= LookupTupleHashEntry(setopstate
->hashtable
, outerslot
,
386 /* If new tuple group, initialize counts */
389 entry
->additional
= (SetOpStatePerGroup
)
390 MemoryContextAlloc(setopstate
->hashtable
->tablecxt
,
391 sizeof(SetOpStatePerGroupData
));
392 initialize_counts((SetOpStatePerGroup
) entry
->additional
);
395 /* Advance the counts */
396 advance_counts((SetOpStatePerGroup
) entry
->additional
, flag
);
400 /* reached second relation */
401 in_first_rel
= false;
403 /* For tuples not seen previously, do not make hashtable entry */
404 entry
= LookupTupleHashEntry(setopstate
->hashtable
, outerslot
,
407 /* Advance the counts if entry is already present */
409 advance_counts((SetOpStatePerGroup
) entry
->additional
, flag
);
412 /* Must reset expression context after each hashtable lookup */
413 ResetExprContext(econtext
);
416 setopstate
->table_filled
= true;
417 /* Initialize to walk the hash table */
418 ResetTupleHashIterator(setopstate
->hashtable
, &setopstate
->hashiter
);
422 * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
424 static TupleTableSlot
*
425 setop_retrieve_hash_table(SetOpState
*setopstate
)
427 TupleHashEntryData
*entry
;
428 TupleTableSlot
*resultTupleSlot
;
431 * get state info from node
433 resultTupleSlot
= setopstate
->ps
.ps_ResultTupleSlot
;
436 * We loop retrieving groups until we find one we should return
438 while (!setopstate
->setop_done
)
440 CHECK_FOR_INTERRUPTS();
443 * Find the next entry in the hash table
445 entry
= ScanTupleHashTable(setopstate
->hashtable
, &setopstate
->hashiter
);
448 /* No more entries in hashtable, so done */
449 setopstate
->setop_done
= true;
454 * See if we should emit any copies of this tuple, and if so return
457 set_output_count(setopstate
, (SetOpStatePerGroup
) entry
->additional
);
459 if (setopstate
->numOutput
> 0)
461 setopstate
->numOutput
--;
462 return ExecStoreMinimalTuple(entry
->firstTuple
,
469 ExecClearTuple(resultTupleSlot
);
473 /* ----------------------------------------------------------------
476 * This initializes the setop node state structures and
477 * the node's subplan.
478 * ----------------------------------------------------------------
481 ExecInitSetOp(SetOp
*node
, EState
*estate
, int eflags
)
483 SetOpState
*setopstate
;
486 /* check for unsupported flags */
487 Assert(!(eflags
& (EXEC_FLAG_BACKWARD
| EXEC_FLAG_MARK
)));
490 * create state structure
492 setopstate
= makeNode(SetOpState
);
493 setopstate
->ps
.plan
= (Plan
*) node
;
494 setopstate
->ps
.state
= estate
;
495 setopstate
->ps
.ExecProcNode
= ExecSetOp
;
497 setopstate
->eqfuncoids
= NULL
;
498 setopstate
->hashfunctions
= NULL
;
499 setopstate
->setop_done
= false;
500 setopstate
->numOutput
= 0;
501 setopstate
->pergroup
= NULL
;
502 setopstate
->grp_firstTuple
= NULL
;
503 setopstate
->hashtable
= NULL
;
504 setopstate
->tableContext
= NULL
;
507 * create expression context
509 ExecAssignExprContext(estate
, &setopstate
->ps
);
512 * If hashing, we also need a longer-lived context to store the hash
513 * table. The table can't just be kept in the per-query context because
514 * we want to be able to throw it away in ExecReScanSetOp.
516 if (node
->strategy
== SETOP_HASHED
)
517 setopstate
->tableContext
=
518 AllocSetContextCreate(CurrentMemoryContext
,
520 ALLOCSET_DEFAULT_SIZES
);
523 * initialize child nodes
525 * If we are hashing then the child plan does not need to handle REWIND
526 * efficiently; see ExecReScanSetOp.
528 if (node
->strategy
== SETOP_HASHED
)
529 eflags
&= ~EXEC_FLAG_REWIND
;
530 outerPlanState(setopstate
) = ExecInitNode(outerPlan(node
), estate
, eflags
);
531 outerDesc
= ExecGetResultType(outerPlanState(setopstate
));
534 * Initialize result slot and type. Setop nodes do no projections, so
535 * initialize projection info for this node appropriately.
537 ExecInitResultTupleSlotTL(&setopstate
->ps
,
538 node
->strategy
== SETOP_HASHED
?
539 &TTSOpsMinimalTuple
: &TTSOpsHeapTuple
);
540 setopstate
->ps
.ps_ProjInfo
= NULL
;
543 * Precompute fmgr lookup data for inner loop. We need both equality and
544 * hashing functions to do it by hashing, but only equality if not
547 if (node
->strategy
== SETOP_HASHED
)
548 execTuplesHashPrepare(node
->numCols
,
550 &setopstate
->eqfuncoids
,
551 &setopstate
->hashfunctions
);
553 setopstate
->eqfunction
=
554 execTuplesMatchPrepare(outerDesc
,
561 if (node
->strategy
== SETOP_HASHED
)
563 build_hash_table(setopstate
);
564 setopstate
->table_filled
= false;
568 setopstate
->pergroup
=
569 (SetOpStatePerGroup
) palloc0(sizeof(SetOpStatePerGroupData
));
575 /* ----------------------------------------------------------------
578 * This shuts down the subplan and frees resources allocated
580 * ----------------------------------------------------------------
583 ExecEndSetOp(SetOpState
*node
)
585 /* free subsidiary stuff including hashtable */
586 if (node
->tableContext
)
587 MemoryContextDelete(node
->tableContext
);
589 ExecEndNode(outerPlanState(node
));
594 ExecReScanSetOp(SetOpState
*node
)
596 PlanState
*outerPlan
= outerPlanState(node
);
598 ExecClearTuple(node
->ps
.ps_ResultTupleSlot
);
599 node
->setop_done
= false;
602 if (((SetOp
*) node
->ps
.plan
)->strategy
== SETOP_HASHED
)
605 * In the hashed case, if we haven't yet built the hash table then we
606 * can just return; nothing done yet, so nothing to undo. If subnode's
607 * chgParam is not NULL then it will be re-scanned by ExecProcNode,
608 * else no reason to re-scan it at all.
610 if (!node
->table_filled
)
614 * If we do have the hash table and the subplan does not have any
615 * parameter changes, then we can just rescan the existing hash table;
616 * no need to build it again.
618 if (outerPlan
->chgParam
== NULL
)
620 ResetTupleHashIterator(node
->hashtable
, &node
->hashiter
);
625 /* Release first tuple of group, if we have made a copy */
626 if (node
->grp_firstTuple
!= NULL
)
628 heap_freetuple(node
->grp_firstTuple
);
629 node
->grp_firstTuple
= NULL
;
632 /* Release any hashtable storage */
633 if (node
->tableContext
)
634 MemoryContextReset(node
->tableContext
);
636 /* And rebuild empty hashtable if needed */
637 if (((SetOp
*) node
->ps
.plan
)->strategy
== SETOP_HASHED
)
639 ResetTupleHashTable(node
->hashtable
);
640 node
->table_filled
= false;
644 * if chgParam of subnode is not null then plan will be re-scanned by
645 * first ExecProcNode.
647 if (outerPlan
->chgParam
== NULL
)
648 ExecReScan(outerPlan
);