src/backend/executor/nodeSetOp.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * nodeSetOp.c
   4  *        Routines to handle INTERSECT and EXCEPT selection
   5  *
   6  * The input of a SetOp node consists of tuples from two relations,
   7  * which have been combined into one dataset, with a junk attribute added
   8  * that shows which relation each tuple came from.  In SETOP_SORTED mode,
   9  * the input has furthermore been sorted according to all the grouping
  10  * columns (ie, all the non-junk attributes).  The SetOp node scans each
  11  * group of identical tuples to determine how many came from each input
  12  * relation.  Then it is a simple matter to emit the output demanded by the
  13  * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
  14  *
  15  * In SETOP_HASHED mode, the input is delivered in no particular order,
  16  * except that we know all the tuples from one input relation will come before
  17  * all the tuples of the other.  The planner guarantees that the first input
  18  * relation is the left-hand one for EXCEPT, and tries to make the smaller
  19  * input relation come first for INTERSECT.  We build a hash table in memory
  20  * with one entry for each group of identical tuples, and count the number of
  21  * tuples in the group from each relation.  After seeing all the input, we
  22  * scan the hashtable and generate the correct output using those counts.
  23  * We can avoid making hashtable entries for any tuples appearing only in the
  24  * second input relation, since they cannot result in any output.
  25  *
  26  * This node type is not used for UNION or UNION ALL, since those can be
  27  * implemented more cheaply (there's no need for the junk attribute to
  28  * identify the source relation).
  29  *
  30  * Note that SetOp does no qual checking nor projection.  The delivered
  31  * output tuples are just copies of the first-to-arrive tuple in each
  32  * input group.
  33  *
  34  *
  35  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  36  * Portions Copyright (c) 1994, Regents of the University of California
  37  *
  38  *
  39  * IDENTIFICATION
  40  *        $PostgreSQL$
  41  *
  42  *-------------------------------------------------------------------------
  43  */
  44
  45 #include "postgres.h"
  46
  47 #include "executor/executor.h"
  48 #include "executor/nodeSetOp.h"
  49 #include "utils/memutils.h"
  50
  51
  52 /*
  53  * SetOpStatePerGroupData - per-group working state
  54  *
  55  * These values are working state that is initialized at the start of
  56  * an input tuple group and updated for each input tuple.
  57  *
  58  * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
  59  * the plan state node.  In SETOP_HASHED mode, the hash table contains one
  60  * of these for each tuple group.
  61  */
  62 typedef struct SetOpStatePerGroupData
  63 {
  64         long            numLeft;                /* number of left-input dups in group */
  65         long            numRight;               /* number of right-input dups in group */
  66 } SetOpStatePerGroupData;
  67
  68 /*
  69  * To implement hashed mode, we need a hashtable that stores a
  70  * representative tuple and the duplicate counts for each distinct set
  71  * of grouping columns.  We compute the hash key from the grouping columns.
  72  */
  73 typedef struct SetOpHashEntryData *SetOpHashEntry;
  74
  75 typedef struct SetOpHashEntryData
  76 {
  77         TupleHashEntryData shared;      /* common header for hash table entries */
  78         SetOpStatePerGroupData pergroup;
  79 } SetOpHashEntryData;
  80
  81
  82 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
  83 static void setop_fill_hash_table(SetOpState *setopstate);
  84 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
  85
  86
  87 /*
  88  * Initialize state for a new group of input values.
  89  */
  90 static inline void
  91 initialize_counts(SetOpStatePerGroup pergroup)
  92 {
  93         pergroup->numLeft = pergroup->numRight = 0;
  94 }
  95
  96 /*
  97  * Advance the appropriate counter for one input tuple.
  98  */
  99 static inline void
 100 advance_counts(SetOpStatePerGroup pergroup, int flag)
 101 {
 102         if (flag)
 103                 pergroup->numRight++;
 104         else
 105                 pergroup->numLeft++;
 106 }
 107
 108 /*
 109  * Fetch the "flag" column from an input tuple.
 110  * This is an integer column with value 0 for left side, 1 for right side.
 111  */
 112 static int
 113 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
 114 {
 115         SetOp      *node = (SetOp *) setopstate->ps.plan;
 116         int                     flag;
 117         bool            isNull;
 118
 119         flag = DatumGetInt32(slot_getattr(inputslot,
 120                                                                           node->flagColIdx,
 121                                                                           &isNull));
 122         Assert(!isNull);
 123         Assert(flag == 0 || flag == 1);
 124         return flag;
 125 }
 126
 127 /*
 128  * Initialize the hash table to empty.
 129  */
 130 static void
 131 build_hash_table(SetOpState *setopstate)
 132 {
 133         SetOp      *node = (SetOp *) setopstate->ps.plan;
 134
 135         Assert(node->strategy == SETOP_HASHED);
 136         Assert(node->numGroups > 0);
 137
 138         setopstate->hashtable = BuildTupleHashTable(node->numCols,
 139                                                                                                 node->dupColIdx,
 140                                                                                                 setopstate->eqfunctions,
 141                                                                                                 setopstate->hashfunctions,
 142                                                                                                 node->numGroups,
 143                                                                                                 sizeof(SetOpHashEntryData),
 144                                                                                                 setopstate->tableContext,
 145                                                                                                 setopstate->tempContext);
 146 }
 147
 148 /*
 149  * We've completed processing a tuple group.  Decide how many copies (if any)
 150  * of its representative row to emit, and store the count into numOutput.
 151  * This logic is straight from the SQL92 specification.
 152  */
 153 static void
 154 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
 155 {
 156         SetOp      *plannode = (SetOp *) setopstate->ps.plan;
 157
 158         switch (plannode->cmd)
 159         {
 160                 case SETOPCMD_INTERSECT:
 161                         if (pergroup->numLeft > 0 && pergroup->numRight > 0)
 162                                 setopstate->numOutput = 1;
 163                         else
 164                                 setopstate->numOutput = 0;
 165                         break;
 166                 case SETOPCMD_INTERSECT_ALL:
 167                         setopstate->numOutput =
 168                                 (pergroup->numLeft < pergroup->numRight) ?
 169                                 pergroup->numLeft : pergroup->numRight;
 170                         break;
 171                 case SETOPCMD_EXCEPT:
 172                         if (pergroup->numLeft > 0 && pergroup->numRight == 0)
 173                                 setopstate->numOutput = 1;
 174                         else
 175                                 setopstate->numOutput = 0;
 176                         break;
 177                 case SETOPCMD_EXCEPT_ALL:
 178                         setopstate->numOutput =
 179                                 (pergroup->numLeft < pergroup->numRight) ?
 180                                 0 : (pergroup->numLeft - pergroup->numRight);
 181                         break;
 182                 default:
 183                         elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
 184                         break;
 185         }
 186 }
 187
 188
 189 /* ----------------------------------------------------------------
 190  *              ExecSetOp
 191  * ----------------------------------------------------------------
 192  */
 193 TupleTableSlot *                                /* return: a tuple or NULL */
 194 ExecSetOp(SetOpState *node)
 195 {
 196         SetOp      *plannode = (SetOp *) node->ps.plan;
 197         TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
 198
 199         /*
 200          * If the previously-returned tuple needs to be returned more than once,
 201          * keep returning it.
 202          */
 203         if (node->numOutput > 0)
 204         {
 205                 node->numOutput--;
 206                 return resultTupleSlot;
 207         }
 208
 209         /* Otherwise, we're done if we are out of groups */
 210         if (node->setop_done)
 211                 return NULL;
 212
 213         /* Fetch the next tuple group according to the correct strategy */
 214         if (plannode->strategy == SETOP_HASHED)
 215         {
 216                 if (!node->table_filled)
 217                         setop_fill_hash_table(node);
 218                 return setop_retrieve_hash_table(node);
 219         }
 220         else
 221                 return setop_retrieve_direct(node);
 222 }
 223
 224 /*
 225  * ExecSetOp for non-hashed case
 226  */
 227 static TupleTableSlot *
 228 setop_retrieve_direct(SetOpState *setopstate)
 229 {
 230         SetOp      *node = (SetOp *) setopstate->ps.plan;
 231         PlanState  *outerPlan;
 232         SetOpStatePerGroup pergroup;
 233         TupleTableSlot *outerslot;
 234         TupleTableSlot *resultTupleSlot;
 235
 236         /*
 237          * get state info from node
 238          */
 239         outerPlan = outerPlanState(setopstate);
 240         pergroup = setopstate->pergroup;
 241         resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
 242
 243         /*
 244          * We loop retrieving groups until we find one we should return
 245          */
 246         while (!setopstate->setop_done)
 247         {
 248                 /*
 249                  * If we don't already have the first tuple of the new group, fetch it
 250                  * from the outer plan.
 251                  */
 252                 if (setopstate->grp_firstTuple == NULL)
 253                 {
 254                         outerslot = ExecProcNode(outerPlan);
 255                         if (!TupIsNull(outerslot))
 256                         {
 257                                 /* Make a copy of the first input tuple */
 258                                 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
 259                         }
 260                         else
 261                         {
 262                                 /* outer plan produced no tuples at all */
 263                                 setopstate->setop_done = true;
 264                                 return NULL;
 265                         }
 266                 }
 267
 268                 /*
 269                  * Store the copied first input tuple in the tuple table slot
 270                  * reserved for it.  The tuple will be deleted when it is cleared
 271                  * from the slot.
 272                  */
 273                 ExecStoreTuple(setopstate->grp_firstTuple,
 274                                            resultTupleSlot,
 275                                            InvalidBuffer,
 276                                            true);
 277                 setopstate->grp_firstTuple = NULL;      /* don't keep two pointers */
 278
 279                 /* Initialize working state for a new input tuple group */
 280                 initialize_counts(pergroup);
 281
 282                 /* Count the first input tuple */
 283                 advance_counts(pergroup,
 284                                            fetch_tuple_flag(setopstate, resultTupleSlot));
 285
 286                 /*
 287                  * Scan the outer plan until we exhaust it or cross a group boundary.
 288                  */
 289                 for (;;)
 290                 {
 291                         outerslot = ExecProcNode(outerPlan);
 292                         if (TupIsNull(outerslot))
 293                         {
 294                                 /* no more outer-plan tuples available */
 295                                 setopstate->setop_done = true;
 296                                 break;
 297                         }
 298
 299                         /*
 300                          * Check whether we've crossed a group boundary.
 301                          */
 302                         if (!execTuplesMatch(resultTupleSlot,
 303                                                                  outerslot,
 304                                                                  node->numCols, node->dupColIdx,
 305                                                                  setopstate->eqfunctions,
 306                                                                  setopstate->tempContext))
 307                         {
 308                                 /*
 309                                  * Save the first input tuple of the next group.
 310                                  */
 311                                 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
 312                                 break;
 313                         }
 314
 315                         /* Still in same group, so count this tuple */
 316                         advance_counts(pergroup,
 317                                                    fetch_tuple_flag(setopstate, outerslot));
 318                 }
 319
 320                 /*
 321                  * Done scanning input tuple group.  See if we should emit any
 322                  * copies of result tuple, and if so return the first copy.
 323                  */
 324                 set_output_count(setopstate, pergroup);
 325
 326                 if (setopstate->numOutput > 0)
 327                 {
 328                         setopstate->numOutput--;
 329                         return resultTupleSlot;
 330                 }
 331         }
 332
 333         /* No more groups */
 334         ExecClearTuple(resultTupleSlot);
 335         return NULL;
 336 }
 337
 338 /*
 339  * ExecSetOp for hashed case: phase 1, read input and build hash table
 340  */
 341 static void
 342 setop_fill_hash_table(SetOpState *setopstate)
 343 {
 344         SetOp      *node = (SetOp *) setopstate->ps.plan;
 345         PlanState  *outerPlan;
 346         int                     firstFlag;
 347         bool            in_first_rel;
 348
 349         /*
 350          * get state info from node
 351          */
 352         outerPlan = outerPlanState(setopstate);
 353         firstFlag = node->firstFlag;
 354         /* verify planner didn't mess up */
 355         Assert(firstFlag == 0 ||
 356                    (firstFlag == 1 &&
 357                         (node->cmd == SETOPCMD_INTERSECT ||
 358                          node->cmd == SETOPCMD_INTERSECT_ALL)));
 359
 360         /*
 361          * Process each outer-plan tuple, and then fetch the next one, until we
 362          * exhaust the outer plan.
 363          */
 364         in_first_rel = true;
 365         for (;;)
 366         {
 367                 TupleTableSlot *outerslot;
 368                 int                     flag;
 369                 SetOpHashEntry entry;
 370                 bool            isnew;
 371
 372                 outerslot = ExecProcNode(outerPlan);
 373                 if (TupIsNull(outerslot))
 374                         break;
 375
 376                 /* Identify whether it's left or right input */
 377                 flag = fetch_tuple_flag(setopstate, outerslot);
 378
 379                 if (flag == firstFlag)
 380                 {
 381                         /* (still) in first input relation */
 382                         Assert(in_first_rel);
 383
 384                         /* Find or build hashtable entry for this tuple's group */
 385                         entry = (SetOpHashEntry)
 386                                 LookupTupleHashEntry(setopstate->hashtable, outerslot, &isnew);
 387
 388                         /* If new tuple group, initialize counts */
 389                         if (isnew)
 390                                 initialize_counts(&entry->pergroup);
 391
 392                         /* Advance the counts */
 393                         advance_counts(&entry->pergroup, flag);
 394                 }
 395                 else
 396                 {
 397                         /* reached second relation */
 398                         in_first_rel = false;
 399
 400                         /* For tuples not seen previously, do not make hashtable entry */
 401                         entry = (SetOpHashEntry)
 402                                 LookupTupleHashEntry(setopstate->hashtable, outerslot, NULL);
 403
 404                         /* Advance the counts if entry is already present */
 405                         if (entry)
 406                                 advance_counts(&entry->pergroup, flag);
 407                 }
 408
 409                 /* Must reset temp context after each hashtable lookup */
 410                 MemoryContextReset(setopstate->tempContext);
 411         }
 412
 413         setopstate->table_filled = true;
 414         /* Initialize to walk the hash table */
 415         ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
 416 }
 417
 418 /*
 419  * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
 420  */
 421 static TupleTableSlot *
 422 setop_retrieve_hash_table(SetOpState *setopstate)
 423 {
 424         SetOpHashEntry entry;
 425         TupleTableSlot *resultTupleSlot;
 426
 427         /*
 428          * get state info from node
 429          */
 430         resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
 431
 432         /*
 433          * We loop retrieving groups until we find one we should return
 434          */
 435         while (!setopstate->setop_done)
 436         {
 437                 /*
 438                  * Find the next entry in the hash table
 439                  */
 440                 entry = (SetOpHashEntry) ScanTupleHashTable(&setopstate->hashiter);
 441                 if (entry == NULL)
 442                 {
 443                         /* No more entries in hashtable, so done */
 444                         setopstate->setop_done = true;
 445                         return NULL;
 446                 }
 447
 448                 /*
 449                  * See if we should emit any copies of this tuple, and if so return
 450                  * the first copy.
 451                  */
 452                 set_output_count(setopstate, &entry->pergroup);
 453
 454                 if (setopstate->numOutput > 0)
 455                 {
 456                         setopstate->numOutput--;
 457                         return ExecStoreMinimalTuple(entry->shared.firstTuple,
 458                                                                                  resultTupleSlot,
 459                                                                                  false);
 460                 }
 461         }
 462
 463         /* No more groups */
 464         ExecClearTuple(resultTupleSlot);
 465         return NULL;
 466 }
 467
 468 /* ----------------------------------------------------------------
 469  *              ExecInitSetOp
 470  *
 471  *              This initializes the setop node state structures and
 472  *              the node's subplan.
 473  * ----------------------------------------------------------------
 474  */
 475 SetOpState *
 476 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
 477 {
 478         SetOpState *setopstate;
 479
 480         /* check for unsupported flags */
 481         Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
 482
 483         /*
 484          * create state structure
 485          */
 486         setopstate = makeNode(SetOpState);
 487         setopstate->ps.plan = (Plan *) node;
 488         setopstate->ps.state = estate;
 489
 490         setopstate->eqfunctions = NULL;
 491         setopstate->hashfunctions = NULL;
 492         setopstate->setop_done = false;
 493         setopstate->numOutput = 0;
 494         setopstate->pergroup = NULL;
 495         setopstate->grp_firstTuple = NULL;
 496         setopstate->hashtable = NULL;
 497         setopstate->tableContext = NULL;
 498
 499         /*
 500          * Miscellaneous initialization
 501          *
 502          * SetOp nodes have no ExprContext initialization because they never call
 503          * ExecQual or ExecProject.  But they do need a per-tuple memory context
 504          * anyway for calling execTuplesMatch.
 505          */
 506         setopstate->tempContext =
 507                 AllocSetContextCreate(CurrentMemoryContext,
 508                                                           "SetOp",
 509                                                           ALLOCSET_DEFAULT_MINSIZE,
 510                                                           ALLOCSET_DEFAULT_INITSIZE,
 511                                                           ALLOCSET_DEFAULT_MAXSIZE);
 512
 513         /*
 514          * If hashing, we also need a longer-lived context to store the hash
 515          * table.  The table can't just be kept in the per-query context because
 516          * we want to be able to throw it away in ExecReScanSetOp.
 517          */
 518         if (node->strategy == SETOP_HASHED)
 519                 setopstate->tableContext =
 520                         AllocSetContextCreate(CurrentMemoryContext,
 521                                                                   "SetOp hash table",
 522                                                                   ALLOCSET_DEFAULT_MINSIZE,
 523                                                                   ALLOCSET_DEFAULT_INITSIZE,
 524                                                                   ALLOCSET_DEFAULT_MAXSIZE);
 525
 526 #define SETOP_NSLOTS 1
 527
 528         /*
 529          * Tuple table initialization
 530          */
 531         ExecInitResultTupleSlot(estate, &setopstate->ps);
 532
 533         /*
 534          * initialize child nodes
 535          *
 536          * If we are hashing then the child plan does not need
 537          * to handle REWIND efficiently; see ExecReScanSetOp.
 538          */
 539         if (node->strategy == SETOP_HASHED)
 540                 eflags &= ~EXEC_FLAG_REWIND;
 541         outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
 542
 543         /*
 544          * setop nodes do no projections, so initialize projection info for this
 545          * node appropriately
 546          */
 547         ExecAssignResultTypeFromTL(&setopstate->ps);
 548         setopstate->ps.ps_ProjInfo = NULL;
 549
 550         /*
 551          * Precompute fmgr lookup data for inner loop. We need both equality and
 552          * hashing functions to do it by hashing, but only equality if not
 553          * hashing.
 554          */
 555         if (node->strategy == SETOP_HASHED)
 556                 execTuplesHashPrepare(node->numCols,
 557                                                           node->dupOperators,
 558                                                           &setopstate->eqfunctions,
 559                                                           &setopstate->hashfunctions);
 560         else
 561                 setopstate->eqfunctions =
 562                         execTuplesMatchPrepare(node->numCols,
 563                                                                    node->dupOperators);
 564
 565         if (node->strategy == SETOP_HASHED)
 566         {
 567                 build_hash_table(setopstate);
 568                 setopstate->table_filled = false;
 569         }
 570         else
 571         {
 572                 setopstate->pergroup =
 573                         (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
 574         }
 575
 576         return setopstate;
 577 }
 578
 579 int
 580 ExecCountSlotsSetOp(SetOp *node)
 581 {
 582         return ExecCountSlotsNode(outerPlan(node)) +
 583                 ExecCountSlotsNode(innerPlan(node)) +
 584                 SETOP_NSLOTS;
 585 }
 586
 587 /* ----------------------------------------------------------------
 588  *              ExecEndSetOp
 589  *
 590  *              This shuts down the subplan and frees resources allocated
 591  *              to this node.
 592  * ----------------------------------------------------------------
 593  */
 594 void
 595 ExecEndSetOp(SetOpState *node)
 596 {
 597         /* clean up tuple table */
 598         ExecClearTuple(node->ps.ps_ResultTupleSlot);
 599
 600         /* free subsidiary stuff including hashtable */
 601         MemoryContextDelete(node->tempContext);
 602         if (node->tableContext)
 603                 MemoryContextDelete(node->tableContext);
 604
 605         ExecEndNode(outerPlanState(node));
 606 }
 607
 608
 609 void
 610 ExecReScanSetOp(SetOpState *node, ExprContext *exprCtxt)
 611 {
 612         ExecClearTuple(node->ps.ps_ResultTupleSlot);
 613         node->setop_done = false;
 614         node->numOutput = 0;
 615
 616         if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
 617         {
 618                 /*
 619                  * In the hashed case, if we haven't yet built the hash table then we
 620                  * can just return; nothing done yet, so nothing to undo. If subnode's
 621                  * chgParam is not NULL then it will be re-scanned by ExecProcNode,
 622                  * else no reason to re-scan it at all.
 623                  */
 624                 if (!node->table_filled)
 625                         return;
 626
 627                 /*
 628                  * If we do have the hash table and the subplan does not have any
 629                  * parameter changes, then we can just rescan the existing hash table;
 630                  * no need to build it again.
 631                  */
 632                 if (((PlanState *) node)->lefttree->chgParam == NULL)
 633                 {
 634                         ResetTupleHashIterator(node->hashtable, &node->hashiter);
 635                         return;
 636                 }
 637         }
 638
 639         /* Release first tuple of group, if we have made a copy */
 640         if (node->grp_firstTuple != NULL)
 641         {
 642                 heap_freetuple(node->grp_firstTuple);
 643                 node->grp_firstTuple = NULL;
 644         }
 645
 646         /* Release any hashtable storage */
 647         if (node->tableContext)
 648                 MemoryContextResetAndDeleteChildren(node->tableContext);
 649
 650         /* And rebuild empty hashtable if needed */
 651         if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
 652         {
 653                 build_hash_table(node);
 654                 node->table_filled = false;
 655         }
 656
 657         /*
 658          * if chgParam of subnode is not null then plan will be re-scanned by
 659          * first ExecProcNode.
 660          */
 661         if (((PlanState *) node)->lefttree->chgParam == NULL)
 662                 ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
 663 }