src/backend/executor/nodeAgg.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * nodeAgg.c
   4  *        Routines to handle aggregate nodes.
   5  *
   6  *        ExecAgg evaluates each aggregate in the following steps:
   7  *
   8  *               transvalue = initcond
   9  *               foreach input_tuple do
  10  *                      transvalue = transfunc(transvalue, input_value(s))
  11  *               result = finalfunc(transvalue)
  12  *
  13  *        If a finalfunc is not supplied then the result is just the ending
  14  *        value of transvalue.
  15  *
  16  *        If transfunc is marked "strict" in pg_proc and initcond is NULL,
  17  *        then the first non-NULL input_value is assigned directly to transvalue,
  18  *        and transfunc isn't applied until the second non-NULL input_value.
  19  *        The agg's first input type and transtype must be the same in this case!
  20  *
  21  *        If transfunc is marked "strict" then NULL input_values are skipped,
  22  *        keeping the previous transvalue.      If transfunc is not strict then it
  23  *        is called for every input tuple and must deal with NULL initcond
  24  *        or NULL input_values for itself.
  25  *
  26  *        If finalfunc is marked "strict" then it is not called when the
  27  *        ending transvalue is NULL, instead a NULL result is created
  28  *        automatically (this is just the usual handling of strict functions,
  29  *        of course).  A non-strict finalfunc can make its own choice of
  30  *        what to return for a NULL ending transvalue.
  31  *
  32  *        We compute aggregate input expressions and run the transition functions
  33  *        in a temporary econtext (aggstate->tmpcontext).  This is reset at
  34  *        least once per input tuple, so when the transvalue datatype is
  35  *        pass-by-reference, we have to be careful to copy it into a longer-lived
  36  *        memory context, and free the prior value to avoid memory leakage.
  37  *        We store transvalues in the memory context aggstate->aggcontext,
  38  *        which is also used for the hashtable structures in AGG_HASHED mode.
  39  *        The node's regular econtext (aggstate->csstate.cstate.cs_ExprContext)
  40  *        is used to run finalize functions and compute the output tuple;
  41  *        this context can be reset once per output tuple.
  42  *
  43  *        Beginning in PostgreSQL 8.1, the executor's AggState node is passed as
  44  *        the fmgr "context" value in all transfunc and finalfunc calls.  It is
  45  *        not really intended that the transition functions will look into the
  46  *        AggState node, but they can use code like
  47  *                      if (fcinfo->context && IsA(fcinfo->context, AggState))
  48  *        to verify that they are being called by nodeAgg.c and not as ordinary
  49  *        SQL functions.  The main reason a transition function might want to know
  50  *        that is that it can avoid palloc'ing a fixed-size pass-by-ref transition
  51  *        value on every call: it can instead just scribble on and return its left
  52  *        input.  Ordinarily it is completely forbidden for functions to modify
  53  *        pass-by-ref inputs, but in the aggregate case we know the left input is
  54  *        either the initial transition value or a previous function result, and
  55  *        in either case its value need not be preserved.  See int8inc() for an
  56  *        example.      Notice that advance_transition_function() is coded to avoid a
  57  *        data copy step when the previous transition value pointer is returned.
  58  *
  59  *
  60  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  61  * Portions Copyright (c) 1994, Regents of the University of California
  62  *
  63  * IDENTIFICATION
  64  *        $PostgreSQL$
  65  *
  66  *-------------------------------------------------------------------------
  67  */
  68
  69 #include "postgres.h"
  70
  71 #include "catalog/pg_aggregate.h"
  72 #include "catalog/pg_proc.h"
  73 #include "catalog/pg_type.h"
  74 #include "executor/executor.h"
  75 #include "executor/nodeAgg.h"
  76 #include "miscadmin.h"
  77 #include "nodes/nodeFuncs.h"
  78 #include "optimizer/clauses.h"
  79 #include "parser/parse_agg.h"
  80 #include "parser/parse_coerce.h"
  81 #include "parser/parse_oper.h"
  82 #include "utils/acl.h"
  83 #include "utils/builtins.h"
  84 #include "utils/lsyscache.h"
  85 #include "utils/memutils.h"
  86 #include "utils/syscache.h"
  87 #include "utils/tuplesort.h"
  88 #include "utils/datum.h"
  89
  90
  91 /*
  92  * AggStatePerAggData - per-aggregate working state for the Agg scan
  93  */
  94 typedef struct AggStatePerAggData
  95 {
  96         /*
  97          * These values are set up during ExecInitAgg() and do not change
  98          * thereafter:
  99          */
 100
 101         /* Links to Aggref expr and state nodes this working state is for */
 102         AggrefExprState *aggrefstate;
 103         Aggref     *aggref;
 104
 105         /* number of input arguments for aggregate */
 106         int                     numArguments;
 107
 108         /* Oids of transfer functions */
 109         Oid                     transfn_oid;
 110         Oid                     finalfn_oid;    /* may be InvalidOid */
 111
 112         /*
 113          * fmgr lookup data for transfer functions --- only valid when
 114          * corresponding oid is not InvalidOid.  Note in particular that fn_strict
 115          * flags are kept here.
 116          */
 117         FmgrInfo        transfn;
 118         FmgrInfo        finalfn;
 119
 120         /*
 121          * Type of input data and Oid of sort operator to use for it; only
 122          * set/used when aggregate has DISTINCT flag.  (These are not used
 123          * directly by nodeAgg, but must be passed to the Tuplesort object.)
 124          */
 125         Oid                     inputType;
 126         Oid                     sortOperator;
 127
 128         /*
 129          * fmgr lookup data for input type's equality operator --- only set/used
 130          * when aggregate has DISTINCT flag.
 131          */
 132         FmgrInfo        equalfn;
 133
 134         /*
 135          * initial value from pg_aggregate entry
 136          */
 137         Datum           initValue;
 138         bool            initValueIsNull;
 139
 140         /*
 141          * We need the len and byval info for the agg's input, result, and
 142          * transition data types in order to know how to copy/delete values.
 143          */
 144         int16           inputtypeLen,
 145                                 resulttypeLen,
 146                                 transtypeLen;
 147         bool            inputtypeByVal,
 148                                 resulttypeByVal,
 149                                 transtypeByVal;
 150
 151         /*
 152          * These values are working state that is initialized at the start of an
 153          * input tuple group and updated for each input tuple.
 154          *
 155          * For a simple (non DISTINCT) aggregate, we just feed the input values
 156          * straight to the transition function.  If it's DISTINCT, we pass the
 157          * input values into a Tuplesort object; then at completion of the input
 158          * tuple group, we scan the sorted values, eliminate duplicates, and run
 159          * the transition function on the rest.
 160          */
 161
 162         Tuplesortstate *sortstate;      /* sort object, if a DISTINCT agg */
 163 } AggStatePerAggData;
 164
 165 /*
 166  * AggStatePerGroupData - per-aggregate-per-group working state
 167  *
 168  * These values are working state that is initialized at the start of
 169  * an input tuple group and updated for each input tuple.
 170  *
 171  * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these
 172  * structs (pointed to by aggstate->pergroup); we re-use the array for
 173  * each input group, if it's AGG_SORTED mode.  In AGG_HASHED mode, the
 174  * hash table contains an array of these structs for each tuple group.
 175  *
 176  * Logically, the sortstate field belongs in this struct, but we do not
 177  * keep it here for space reasons: we don't support DISTINCT aggregates
 178  * in AGG_HASHED mode, so there's no reason to use up a pointer field
 179  * in every entry of the hashtable.
 180  */
 181 typedef struct AggStatePerGroupData
 182 {
 183         Datum           transValue;             /* current transition value */
 184         bool            transValueIsNull;
 185
 186         bool            noTransValue;   /* true if transValue not set yet */
 187
 188         /*
 189          * Note: noTransValue initially has the same value as transValueIsNull,
 190          * and if true both are cleared to false at the same time.      They are not
 191          * the same though: if transfn later returns a NULL, we want to keep that
 192          * NULL and not auto-replace it with a later input value. Only the first
 193          * non-NULL input will be auto-substituted.
 194          */
 195 } AggStatePerGroupData;
 196
 197 /*
 198  * To implement hashed aggregation, we need a hashtable that stores a
 199  * representative tuple and an array of AggStatePerGroup structs for each
 200  * distinct set of GROUP BY column values.      We compute the hash key from
 201  * the GROUP BY columns.
 202  */
 203 typedef struct AggHashEntryData *AggHashEntry;
 204
 205 typedef struct AggHashEntryData
 206 {
 207         TupleHashEntryData shared;      /* common header for hash table entries */
 208         /* per-aggregate transition status array - must be last! */
 209         AggStatePerGroupData pergroup[1];       /* VARIABLE LENGTH ARRAY */
 210 } AggHashEntryData;                             /* VARIABLE LENGTH STRUCT */
 211
 212
 213 static void initialize_aggregates(AggState *aggstate,
 214                                           AggStatePerAgg peragg,
 215                                           AggStatePerGroup pergroup);
 216 static void advance_transition_function(AggState *aggstate,
 217                                                         AggStatePerAgg peraggstate,
 218                                                         AggStatePerGroup pergroupstate,
 219                                                         FunctionCallInfoData *fcinfo);
 220 static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
 221 static void process_sorted_aggregate(AggState *aggstate,
 222                                                  AggStatePerAgg peraggstate,
 223                                                  AggStatePerGroup pergroupstate);
 224 static void finalize_aggregate(AggState *aggstate,
 225                                    AggStatePerAgg peraggstate,
 226                                    AggStatePerGroup pergroupstate,
 227                                    Datum *resultVal, bool *resultIsNull);
 228 static Bitmapset *find_unaggregated_cols(AggState *aggstate);
 229 static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos);
 230 static void build_hash_table(AggState *aggstate);
 231 static AggHashEntry lookup_hash_entry(AggState *aggstate,
 232                                   TupleTableSlot *inputslot);
 233 static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
 234 static void agg_fill_hash_table(AggState *aggstate);
 235 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
 236 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
 237
 238
 239 /*
 240  * Initialize all aggregates for a new group of input values.
 241  *
 242  * When called, CurrentMemoryContext should be the per-query context.
 243  */
 244 static void
 245 initialize_aggregates(AggState *aggstate,
 246                                           AggStatePerAgg peragg,
 247                                           AggStatePerGroup pergroup)
 248 {
 249         int                     aggno;
 250
 251         for (aggno = 0; aggno < aggstate->numaggs; aggno++)
 252         {
 253                 AggStatePerAgg peraggstate = &peragg[aggno];
 254                 AggStatePerGroup pergroupstate = &pergroup[aggno];
 255                 Aggref     *aggref = peraggstate->aggref;
 256
 257                 /*
 258                  * Start a fresh sort operation for each DISTINCT aggregate.
 259                  */
 260                 if (aggref->aggdistinct)
 261                 {
 262                         /*
 263                          * In case of rescan, maybe there could be an uncompleted sort
 264                          * operation?  Clean it up if so.
 265                          */
 266                         if (peraggstate->sortstate)
 267                                 tuplesort_end(peraggstate->sortstate);
 268
 269                         peraggstate->sortstate =
 270                                 tuplesort_begin_datum(peraggstate->inputType,
 271                                                                           peraggstate->sortOperator, false,
 272                                                                           work_mem, false);
 273                 }
 274
 275                 /*
 276                  * If we are reinitializing after a group boundary, we have to free
 277                  * any prior transValue to avoid memory leakage.  We must check not
 278                  * only the isnull flag but whether the pointer is NULL; since
 279                  * pergroupstate is initialized with palloc0, the initial condition
 280                  * has isnull = 0 and null pointer.
 281                  */
 282                 if (!peraggstate->transtypeByVal &&
 283                         !pergroupstate->transValueIsNull &&
 284                         DatumGetPointer(pergroupstate->transValue) != NULL)
 285                         pfree(DatumGetPointer(pergroupstate->transValue));
 286
 287                 /*
 288                  * (Re)set transValue to the initial value.
 289                  *
 290                  * Note that when the initial value is pass-by-ref, we must copy it
 291                  * (into the aggcontext) since we will pfree the transValue later.
 292                  */
 293                 if (peraggstate->initValueIsNull)
 294                         pergroupstate->transValue = peraggstate->initValue;
 295                 else
 296                 {
 297                         MemoryContext oldContext;
 298
 299                         oldContext = MemoryContextSwitchTo(aggstate->aggcontext);
 300                         pergroupstate->transValue = datumCopy(peraggstate->initValue,
 301                                                                                                   peraggstate->transtypeByVal,
 302                                                                                                   peraggstate->transtypeLen);
 303                         MemoryContextSwitchTo(oldContext);
 304                 }
 305                 pergroupstate->transValueIsNull = peraggstate->initValueIsNull;
 306
 307                 /*
 308                  * If the initial value for the transition state doesn't exist in the
 309                  * pg_aggregate table then we will let the first non-NULL value
 310                  * returned from the outer procNode become the initial value. (This is
 311                  * useful for aggregates like max() and min().) The noTransValue flag
 312                  * signals that we still need to do this.
 313                  */
 314                 pergroupstate->noTransValue = peraggstate->initValueIsNull;
 315         }
 316 }
 317
 318 /*
 319  * Given new input value(s), advance the transition function of an aggregate.
 320  *
 321  * The new values (and null flags) have been preloaded into argument positions
 322  * 1 and up in fcinfo, so that we needn't copy them again to pass to the
 323  * transition function.  No other fields of fcinfo are assumed valid.
 324  *
 325  * It doesn't matter which memory context this is called in.
 326  */
 327 static void
 328 advance_transition_function(AggState *aggstate,
 329                                                         AggStatePerAgg peraggstate,
 330                                                         AggStatePerGroup pergroupstate,
 331                                                         FunctionCallInfoData *fcinfo)
 332 {
 333         int                     numArguments = peraggstate->numArguments;
 334         MemoryContext oldContext;
 335         Datum           newVal;
 336         int                     i;
 337
 338         if (peraggstate->transfn.fn_strict)
 339         {
 340                 /*
 341                  * For a strict transfn, nothing happens when there's a NULL input; we
 342                  * just keep the prior transValue.
 343                  */
 344                 for (i = 1; i <= numArguments; i++)
 345                 {
 346                         if (fcinfo->argnull[i])
 347                                 return;
 348                 }
 349                 if (pergroupstate->noTransValue)
 350                 {
 351                         /*
 352                          * transValue has not been initialized. This is the first non-NULL
 353                          * input value. We use it as the initial value for transValue. (We
 354                          * already checked that the agg's input type is binary-compatible
 355                          * with its transtype, so straight copy here is OK.)
 356                          *
 357                          * We must copy the datum into aggcontext if it is pass-by-ref. We
 358                          * do not need to pfree the old transValue, since it's NULL.
 359                          */
 360                         oldContext = MemoryContextSwitchTo(aggstate->aggcontext);
 361                         pergroupstate->transValue = datumCopy(fcinfo->arg[1],
 362                                                                                                   peraggstate->transtypeByVal,
 363                                                                                                   peraggstate->transtypeLen);
 364                         pergroupstate->transValueIsNull = false;
 365                         pergroupstate->noTransValue = false;
 366                         MemoryContextSwitchTo(oldContext);
 367                         return;
 368                 }
 369                 if (pergroupstate->transValueIsNull)
 370                 {
 371                         /*
 372                          * Don't call a strict function with NULL inputs.  Note it is
 373                          * possible to get here despite the above tests, if the transfn is
 374                          * strict *and* returned a NULL on a prior cycle. If that happens
 375                          * we will propagate the NULL all the way to the end.
 376                          */
 377                         return;
 378                 }
 379         }
 380
 381         /* We run the transition functions in per-input-tuple memory context */
 382         oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory);
 383
 384         /*
 385          * OK to call the transition function
 386          */
 387         InitFunctionCallInfoData(*fcinfo, &(peraggstate->transfn),
 388                                                          numArguments + 1,
 389                                                          (void *) aggstate, NULL);
 390         fcinfo->arg[0] = pergroupstate->transValue;
 391         fcinfo->argnull[0] = pergroupstate->transValueIsNull;
 392
 393         newVal = FunctionCallInvoke(fcinfo);
 394
 395         /*
 396          * If pass-by-ref datatype, must copy the new value into aggcontext and
 397          * pfree the prior transValue.  But if transfn returned a pointer to its
 398          * first input, we don't need to do anything.
 399          */
 400         if (!peraggstate->transtypeByVal &&
 401                 DatumGetPointer(newVal) != DatumGetPointer(pergroupstate->transValue))
 402         {
 403                 if (!fcinfo->isnull)
 404                 {
 405                         MemoryContextSwitchTo(aggstate->aggcontext);
 406                         newVal = datumCopy(newVal,
 407                                                            peraggstate->transtypeByVal,
 408                                                            peraggstate->transtypeLen);
 409                 }
 410                 if (!pergroupstate->transValueIsNull)
 411                         pfree(DatumGetPointer(pergroupstate->transValue));
 412         }
 413
 414         pergroupstate->transValue = newVal;
 415         pergroupstate->transValueIsNull = fcinfo->isnull;
 416
 417         MemoryContextSwitchTo(oldContext);
 418 }
 419
 420 /*
 421  * Advance all the aggregates for one input tuple.      The input tuple
 422  * has been stored in tmpcontext->ecxt_outertuple, so that it is accessible
 423  * to ExecEvalExpr.  pergroup is the array of per-group structs to use
 424  * (this might be in a hashtable entry).
 425  *
 426  * When called, CurrentMemoryContext should be the per-query context.
 427  */
 428 static void
 429 advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 430 {
 431         ExprContext *econtext = aggstate->tmpcontext;
 432         int                     aggno;
 433
 434         for (aggno = 0; aggno < aggstate->numaggs; aggno++)
 435         {
 436                 AggStatePerAgg peraggstate = &aggstate->peragg[aggno];
 437                 AggStatePerGroup pergroupstate = &pergroup[aggno];
 438                 AggrefExprState *aggrefstate = peraggstate->aggrefstate;
 439                 Aggref     *aggref = peraggstate->aggref;
 440                 FunctionCallInfoData fcinfo;
 441                 int                     i;
 442                 ListCell   *arg;
 443                 MemoryContext oldContext;
 444
 445                 /* Switch memory context just once for all args */
 446                 oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
 447
 448                 /* Evaluate inputs and save in fcinfo */
 449                 /* We start from 1, since the 0th arg will be the transition value */
 450                 i = 1;
 451                 foreach(arg, aggrefstate->args)
 452                 {
 453                         ExprState  *argstate = (ExprState *) lfirst(arg);
 454
 455                         fcinfo.arg[i] = ExecEvalExpr(argstate, econtext,
 456                                                                                  fcinfo.argnull + i, NULL);
 457                         i++;
 458                 }
 459
 460                 /* Switch back */
 461                 MemoryContextSwitchTo(oldContext);
 462
 463                 if (aggref->aggdistinct)
 464                 {
 465                         /* in DISTINCT mode, we may ignore nulls */
 466                         /* XXX we assume there is only one input column */
 467                         if (fcinfo.argnull[1])
 468                                 continue;
 469                         tuplesort_putdatum(peraggstate->sortstate, fcinfo.arg[1],
 470                                                            fcinfo.argnull[1]);
 471                 }
 472                 else
 473                 {
 474                         advance_transition_function(aggstate, peraggstate, pergroupstate,
 475                                                                                 &fcinfo);
 476                 }
 477         }
 478 }
 479
 480 /*
 481  * Run the transition function for a DISTINCT aggregate.  This is called
 482  * after we have completed entering all the input values into the sort
 483  * object.      We complete the sort, read out the values in sorted order,
 484  * and run the transition function on each non-duplicate value.
 485  *
 486  * When called, CurrentMemoryContext should be the per-query context.
 487  */
 488 static void
 489 process_sorted_aggregate(AggState *aggstate,
 490                                                  AggStatePerAgg peraggstate,
 491                                                  AggStatePerGroup pergroupstate)
 492 {
 493         Datum           oldVal = (Datum) 0;
 494         bool            haveOldVal = false;
 495         MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
 496         MemoryContext oldContext;
 497         Datum      *newVal;
 498         bool       *isNull;
 499         FunctionCallInfoData fcinfo;
 500
 501         tuplesort_performsort(peraggstate->sortstate);
 502
 503         newVal = fcinfo.arg + 1;
 504         isNull = fcinfo.argnull + 1;
 505
 506         /*
 507          * Note: if input type is pass-by-ref, the datums returned by the sort are
 508          * freshly palloc'd in the per-query context, so we must be careful to
 509          * pfree them when they are no longer needed.
 510          */
 511
 512         while (tuplesort_getdatum(peraggstate->sortstate, true,
 513                                                           newVal, isNull))
 514         {
 515                 /*
 516                  * DISTINCT always suppresses nulls, per SQL spec, regardless of the
 517                  * transition function's strictness.
 518                  */
 519                 if (*isNull)
 520                         continue;
 521
 522                 /*
 523                  * Clear and select the working context for evaluation of the equality
 524                  * function and transition function.
 525                  */
 526                 MemoryContextReset(workcontext);
 527                 oldContext = MemoryContextSwitchTo(workcontext);
 528
 529                 if (haveOldVal &&
 530                         DatumGetBool(FunctionCall2(&peraggstate->equalfn,
 531                                                                            oldVal, *newVal)))
 532                 {
 533                         /* equal to prior, so forget this one */
 534                         if (!peraggstate->inputtypeByVal)
 535                                 pfree(DatumGetPointer(*newVal));
 536                 }
 537                 else
 538                 {
 539                         advance_transition_function(aggstate, peraggstate, pergroupstate,
 540                                                                                 &fcinfo);
 541                         /* forget the old value, if any */
 542                         if (haveOldVal && !peraggstate->inputtypeByVal)
 543                                 pfree(DatumGetPointer(oldVal));
 544                         /* and remember the new one for subsequent equality checks */
 545                         oldVal = *newVal;
 546                         haveOldVal = true;
 547                 }
 548
 549                 MemoryContextSwitchTo(oldContext);
 550         }
 551
 552         if (haveOldVal && !peraggstate->inputtypeByVal)
 553                 pfree(DatumGetPointer(oldVal));
 554
 555         tuplesort_end(peraggstate->sortstate);
 556         peraggstate->sortstate = NULL;
 557 }
 558
 559 /*
 560  * Compute the final value of one aggregate.
 561  *
 562  * The finalfunction will be run, and the result delivered, in the
 563  * output-tuple context; caller's CurrentMemoryContext does not matter.
 564  */
 565 static void
 566 finalize_aggregate(AggState *aggstate,
 567                                    AggStatePerAgg peraggstate,
 568                                    AggStatePerGroup pergroupstate,
 569                                    Datum *resultVal, bool *resultIsNull)
 570 {
 571         MemoryContext oldContext;
 572
 573         oldContext = MemoryContextSwitchTo(aggstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
 574
 575         /*
 576          * Apply the agg's finalfn if one is provided, else return transValue.
 577          */
 578         if (OidIsValid(peraggstate->finalfn_oid))
 579         {
 580                 FunctionCallInfoData fcinfo;
 581
 582                 InitFunctionCallInfoData(fcinfo, &(peraggstate->finalfn), 1,
 583                                                                  (void *) aggstate, NULL);
 584                 fcinfo.arg[0] = pergroupstate->transValue;
 585                 fcinfo.argnull[0] = pergroupstate->transValueIsNull;
 586                 if (fcinfo.flinfo->fn_strict && pergroupstate->transValueIsNull)
 587                 {
 588                         /* don't call a strict function with NULL inputs */
 589                         *resultVal = (Datum) 0;
 590                         *resultIsNull = true;
 591                 }
 592                 else
 593                 {
 594                         *resultVal = FunctionCallInvoke(&fcinfo);
 595                         *resultIsNull = fcinfo.isnull;
 596                 }
 597         }
 598         else
 599         {
 600                 *resultVal = pergroupstate->transValue;
 601                 *resultIsNull = pergroupstate->transValueIsNull;
 602         }
 603
 604         /*
 605          * If result is pass-by-ref, make sure it is in the right context.
 606          */
 607         if (!peraggstate->resulttypeByVal && !*resultIsNull &&
 608                 !MemoryContextContains(CurrentMemoryContext,
 609                                                            DatumGetPointer(*resultVal)))
 610                 *resultVal = datumCopy(*resultVal,
 611                                                            peraggstate->resulttypeByVal,
 612                                                            peraggstate->resulttypeLen);
 613
 614         MemoryContextSwitchTo(oldContext);
 615 }
 616
 617 /*
 618  * find_unaggregated_cols
 619  *        Construct a bitmapset of the column numbers of un-aggregated Vars
 620  *        appearing in our targetlist and qual (HAVING clause)
 621  */
 622 static Bitmapset *
 623 find_unaggregated_cols(AggState *aggstate)
 624 {
 625         Agg                *node = (Agg *) aggstate->ss.ps.plan;
 626         Bitmapset  *colnos;
 627
 628         colnos = NULL;
 629         (void) find_unaggregated_cols_walker((Node *) node->plan.targetlist,
 630                                                                                  &colnos);
 631         (void) find_unaggregated_cols_walker((Node *) node->plan.qual,
 632                                                                                  &colnos);
 633         return colnos;
 634 }
 635
 636 static bool
 637 find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
 638 {
 639         if (node == NULL)
 640                 return false;
 641         if (IsA(node, Var))
 642         {
 643                 Var                *var = (Var *) node;
 644
 645                 /* setrefs.c should have set the varno to OUTER */
 646                 Assert(var->varno == OUTER);
 647                 Assert(var->varlevelsup == 0);
 648                 *colnos = bms_add_member(*colnos, var->varattno);
 649                 return false;
 650         }
 651         if (IsA(node, Aggref))          /* do not descend into aggregate exprs */
 652                 return false;
 653         return expression_tree_walker(node, find_unaggregated_cols_walker,
 654                                                                   (void *) colnos);
 655 }
 656
 657 /*
 658  * Initialize the hash table to empty.
 659  *
 660  * The hash table always lives in the aggcontext memory context.
 661  */
 662 static void
 663 build_hash_table(AggState *aggstate)
 664 {
 665         Agg                *node = (Agg *) aggstate->ss.ps.plan;
 666         MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
 667         Size            entrysize;
 668
 669         Assert(node->aggstrategy == AGG_HASHED);
 670         Assert(node->numGroups > 0);
 671
 672         entrysize = sizeof(AggHashEntryData) +
 673                 (aggstate->numaggs - 1) *sizeof(AggStatePerGroupData);
 674
 675         aggstate->hashtable = BuildTupleHashTable(node->numCols,
 676                                                                                           node->grpColIdx,
 677                                                                                           aggstate->eqfunctions,
 678                                                                                           aggstate->hashfunctions,
 679                                                                                           node->numGroups,
 680                                                                                           entrysize,
 681                                                                                           aggstate->aggcontext,
 682                                                                                           tmpmem);
 683 }
 684
 685 /*
 686  * Create a list of the tuple columns that actually need to be stored in
 687  * hashtable entries.  The incoming tuples from the child plan node will
 688  * contain grouping columns, other columns referenced in our targetlist and
 689  * qual, columns used to compute the aggregate functions, and perhaps just
 690  * junk columns we don't use at all.  Only columns of the first two types
 691  * need to be stored in the hashtable, and getting rid of the others can
 692  * make the table entries significantly smaller.  To avoid messing up Var
 693  * numbering, we keep the same tuple descriptor for hashtable entries as the
 694  * incoming tuples have, but set unwanted columns to NULL in the tuples that
 695  * go into the table.
 696  *
 697  * To eliminate duplicates, we build a bitmapset of the needed columns, then
 698  * convert it to an integer list (cheaper to scan at runtime). The list is
 699  * in decreasing order so that the first entry is the largest;
 700  * lookup_hash_entry depends on this to use slot_getsomeattrs correctly.
 701  * Note that the list is preserved over ExecReScanAgg, so we allocate it in
 702  * the per-query context (unlike the hash table itself).
 703  *
 704  * Note: at present, searching the tlist/qual is not really necessary since
 705  * the parser should disallow any unaggregated references to ungrouped
 706  * columns.  However, the search will be needed when we add support for
 707  * SQL99 semantics that allow use of "functionally dependent" columns that
 708  * haven't been explicitly grouped by.
 709  */
 710 static List *
 711 find_hash_columns(AggState *aggstate)
 712 {
 713         Agg                *node = (Agg *) aggstate->ss.ps.plan;
 714         Bitmapset  *colnos;
 715         List       *collist;
 716         int                     i;
 717
 718         /* Find Vars that will be needed in tlist and qual */
 719         colnos = find_unaggregated_cols(aggstate);
 720         /* Add in all the grouping columns */
 721         for (i = 0; i < node->numCols; i++)
 722                 colnos = bms_add_member(colnos, node->grpColIdx[i]);
 723         /* Convert to list, using lcons so largest element ends up first */
 724         collist = NIL;
 725         while ((i = bms_first_member(colnos)) >= 0)
 726                 collist = lcons_int(i, collist);
 727         bms_free(colnos);
 728
 729         return collist;
 730 }
 731
 732 /*
 733  * Estimate per-hash-table-entry overhead for the planner.
 734  *
 735  * Note that the estimate does not include space for pass-by-reference
 736  * transition data values, nor for the representative tuple of each group.
 737  */
 738 Size
 739 hash_agg_entry_size(int numAggs)
 740 {
 741         Size            entrysize;
 742
 743         /* This must match build_hash_table */
 744         entrysize = sizeof(AggHashEntryData) +
 745                 (numAggs - 1) *sizeof(AggStatePerGroupData);
 746         entrysize = MAXALIGN(entrysize);
 747         /* Account for hashtable overhead (assuming fill factor = 1) */
 748         entrysize += 3 * sizeof(void *);
 749         return entrysize;
 750 }
 751
 752 /*
 753  * Find or create a hashtable entry for the tuple group containing the
 754  * given tuple.
 755  *
 756  * When called, CurrentMemoryContext should be the per-query context.
 757  */
 758 static AggHashEntry
 759 lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
 760 {
 761         TupleTableSlot *hashslot = aggstate->hashslot;
 762         ListCell   *l;
 763         AggHashEntry entry;
 764         bool            isnew;
 765
 766         /* if first time through, initialize hashslot by cloning input slot */
 767         if (hashslot->tts_tupleDescriptor == NULL)
 768         {
 769                 ExecSetSlotDescriptor(hashslot, inputslot->tts_tupleDescriptor);
 770                 /* Make sure all unused columns are NULLs */
 771                 ExecStoreAllNullTuple(hashslot);
 772         }
 773
 774         /* transfer just the needed columns into hashslot */
 775         slot_getsomeattrs(inputslot, linitial_int(aggstate->hash_needed));
 776         foreach(l, aggstate->hash_needed)
 777         {
 778                 int                     varNumber = lfirst_int(l) - 1;
 779
 780                 hashslot->tts_values[varNumber] = inputslot->tts_values[varNumber];
 781                 hashslot->tts_isnull[varNumber] = inputslot->tts_isnull[varNumber];
 782         }
 783
 784         /* find or create the hashtable entry using the filtered tuple */
 785         entry = (AggHashEntry) LookupTupleHashEntry(aggstate->hashtable,
 786                                                                                                 hashslot,
 787                                                                                                 &isnew);
 788
 789         if (isnew)
 790         {
 791                 /* initialize aggregates for new tuple group */
 792                 initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
 793         }
 794
 795         return entry;
 796 }
 797
 798 /*
 799  * ExecAgg -
 800  *
 801  *        ExecAgg receives tuples from its outer subplan and aggregates over
 802  *        the appropriate attribute for each aggregate function use (Aggref
 803  *        node) appearing in the targetlist or qual of the node.  The number
 804  *        of tuples to aggregate over depends on whether grouped or plain
 805  *        aggregation is selected.      In grouped aggregation, we produce a result
 806  *        row for each group; in plain aggregation there's a single result row
 807  *        for the whole query.  In either case, the value of each aggregate is
 808  *        stored in the expression context to be used when ExecProject evaluates
 809  *        the result tuple.
 810  */
 811 TupleTableSlot *
 812 ExecAgg(AggState *node)
 813 {
 814         if (node->agg_done)
 815                 return NULL;
 816
 817         /*
 818          * Check to see if we're still projecting out tuples from a previous agg
 819          * tuple (because there is a function-returning-set in the projection
 820          * expressions).  If so, try to project another one.
 821          */
 822         if (node->ss.ps.ps_TupFromTlist)
 823         {
 824                 TupleTableSlot *result;
 825                 ExprDoneCond isDone;
 826
 827                 result = ExecProject(node->ss.ps.ps_ProjInfo, &isDone);
 828                 if (isDone == ExprMultipleResult)
 829                         return result;
 830                 /* Done with that source tuple... */
 831                 node->ss.ps.ps_TupFromTlist = false;
 832         }
 833
 834         if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
 835         {
 836                 if (!node->table_filled)
 837                         agg_fill_hash_table(node);
 838                 return agg_retrieve_hash_table(node);
 839         }
 840         else
 841                 return agg_retrieve_direct(node);
 842 }
 843
 844 /*
 845  * ExecAgg for non-hashed case
 846  */
 847 static TupleTableSlot *
 848 agg_retrieve_direct(AggState *aggstate)
 849 {
 850         Agg                *node = (Agg *) aggstate->ss.ps.plan;
 851         PlanState  *outerPlan;
 852         ExprContext *econtext;
 853         ExprContext *tmpcontext;
 854         Datum      *aggvalues;
 855         bool       *aggnulls;
 856         AggStatePerAgg peragg;
 857         AggStatePerGroup pergroup;
 858         TupleTableSlot *outerslot;
 859         TupleTableSlot *firstSlot;
 860         int                     aggno;
 861
 862         /*
 863          * get state info from node
 864          */
 865         outerPlan = outerPlanState(aggstate);
 866         /* econtext is the per-output-tuple expression context */
 867         econtext = aggstate->ss.ps.ps_ExprContext;
 868         aggvalues = econtext->ecxt_aggvalues;
 869         aggnulls = econtext->ecxt_aggnulls;
 870         /* tmpcontext is the per-input-tuple expression context */
 871         tmpcontext = aggstate->tmpcontext;
 872         peragg = aggstate->peragg;
 873         pergroup = aggstate->pergroup;
 874         firstSlot = aggstate->ss.ss_ScanTupleSlot;
 875
 876         /*
 877          * We loop retrieving groups until we find one matching
 878          * aggstate->ss.ps.qual
 879          */
 880         while (!aggstate->agg_done)
 881         {
 882                 /*
 883                  * If we don't already have the first tuple of the new group, fetch it
 884                  * from the outer plan.
 885                  */
 886                 if (aggstate->grp_firstTuple == NULL)
 887                 {
 888                         outerslot = ExecProcNode(outerPlan);
 889                         if (!TupIsNull(outerslot))
 890                         {
 891                                 /*
 892                                  * Make a copy of the first input tuple; we will use this for
 893                                  * comparisons (in group mode) and for projection.
 894                                  */
 895                                 aggstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
 896                         }
 897                         else
 898                         {
 899                                 /* outer plan produced no tuples at all */
 900                                 aggstate->agg_done = true;
 901                                 /* If we are grouping, we should produce no tuples too */
 902                                 if (node->aggstrategy != AGG_PLAIN)
 903                                         return NULL;
 904                         }
 905                 }
 906
 907                 /*
 908                  * Clear the per-output-tuple context for each group
 909                  */
 910                 ResetExprContext(econtext);
 911
 912                 /*
 913                  * Initialize working state for a new input tuple group
 914                  */
 915                 initialize_aggregates(aggstate, peragg, pergroup);
 916
 917                 if (aggstate->grp_firstTuple != NULL)
 918                 {
 919                         /*
 920                          * Store the copied first input tuple in the tuple table slot
 921                          * reserved for it.  The tuple will be deleted when it is cleared
 922                          * from the slot.
 923                          */
 924                         ExecStoreTuple(aggstate->grp_firstTuple,
 925                                                    firstSlot,
 926                                                    InvalidBuffer,
 927                                                    true);
 928                         aggstate->grp_firstTuple = NULL;        /* don't keep two pointers */
 929
 930                         /* set up for first advance_aggregates call */
 931                         tmpcontext->ecxt_outertuple = firstSlot;
 932
 933                         /*
 934                          * Process each outer-plan tuple, and then fetch the next one,
 935                          * until we exhaust the outer plan or cross a group boundary.
 936                          */
 937                         for (;;)
 938                         {
 939                                 advance_aggregates(aggstate, pergroup);
 940
 941                                 /* Reset per-input-tuple context after each tuple */
 942                                 ResetExprContext(tmpcontext);
 943
 944                                 outerslot = ExecProcNode(outerPlan);
 945                                 if (TupIsNull(outerslot))
 946                                 {
 947                                         /* no more outer-plan tuples available */
 948                                         aggstate->agg_done = true;
 949                                         break;
 950                                 }
 951                                 /* set up for next advance_aggregates call */
 952                                 tmpcontext->ecxt_outertuple = outerslot;
 953
 954                                 /*
 955                                  * If we are grouping, check whether we've crossed a group
 956                                  * boundary.
 957                                  */
 958                                 if (node->aggstrategy == AGG_SORTED)
 959                                 {
 960                                         if (!execTuplesMatch(firstSlot,
 961                                                                                  outerslot,
 962                                                                                  node->numCols, node->grpColIdx,
 963                                                                                  aggstate->eqfunctions,
 964                                                                                  tmpcontext->ecxt_per_tuple_memory))
 965                                         {
 966                                                 /*
 967                                                  * Save the first input tuple of the next group.
 968                                                  */
 969                                                 aggstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
 970                                                 break;
 971                                         }
 972                                 }
 973                         }
 974                 }
 975
 976                 /*
 977                  * Done scanning input tuple group. Finalize each aggregate
 978                  * calculation, and stash results in the per-output-tuple context.
 979                  */
 980                 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
 981                 {
 982                         AggStatePerAgg peraggstate = &peragg[aggno];
 983                         AggStatePerGroup pergroupstate = &pergroup[aggno];
 984
 985                         if (peraggstate->aggref->aggdistinct)
 986                                 process_sorted_aggregate(aggstate, peraggstate, pergroupstate);
 987
 988                         finalize_aggregate(aggstate, peraggstate, pergroupstate,
 989                                                            &aggvalues[aggno], &aggnulls[aggno]);
 990                 }
 991
 992                 /*
 993                  * Use the representative input tuple for any references to
 994                  * non-aggregated input columns in the qual and tlist.  (If we are not
 995                  * grouping, and there are no input rows at all, we will come here
 996                  * with an empty firstSlot ... but if not grouping, there can't be any
 997                  * references to non-aggregated input columns, so no problem.)
 998                  */
 999                 econtext->ecxt_outertuple = firstSlot;
1000
1001                 /*
1002                  * Check the qual (HAVING clause); if the group does not match, ignore
1003                  * it and loop back to try to process another group.
1004                  */
1005                 if (ExecQual(aggstate->ss.ps.qual, econtext, false))
1006                 {
1007                         /*
1008                          * Form and return a projection tuple using the aggregate results
1009                          * and the representative input tuple.
1010                          */
1011                         TupleTableSlot *result;
1012                         ExprDoneCond isDone;
1013
1014                         result = ExecProject(aggstate->ss.ps.ps_ProjInfo, &isDone);
1015
1016                         if (isDone != ExprEndResult)
1017                         {
1018                                 aggstate->ss.ps.ps_TupFromTlist =
1019                                         (isDone == ExprMultipleResult);
1020                                 return result;
1021                         }
1022                 }
1023         }
1024
1025         /* No more groups */
1026         return NULL;
1027 }
1028
1029 /*
1030  * ExecAgg for hashed case: phase 1, read input and build hash table
1031  */
1032 static void
1033 agg_fill_hash_table(AggState *aggstate)
1034 {
1035         PlanState  *outerPlan;
1036         ExprContext *tmpcontext;
1037         AggHashEntry entry;
1038         TupleTableSlot *outerslot;
1039
1040         /*
1041          * get state info from node
1042          */
1043         outerPlan = outerPlanState(aggstate);
1044         /* tmpcontext is the per-input-tuple expression context */
1045         tmpcontext = aggstate->tmpcontext;
1046
1047         /*
1048          * Process each outer-plan tuple, and then fetch the next one, until we
1049          * exhaust the outer plan.
1050          */
1051         for (;;)
1052         {
1053                 outerslot = ExecProcNode(outerPlan);
1054                 if (TupIsNull(outerslot))
1055                         break;
1056                 /* set up for advance_aggregates call */
1057                 tmpcontext->ecxt_outertuple = outerslot;
1058
1059                 /* Find or build hashtable entry for this tuple's group */
1060                 entry = lookup_hash_entry(aggstate, outerslot);
1061
1062                 /* Advance the aggregates */
1063                 advance_aggregates(aggstate, entry->pergroup);
1064
1065                 /* Reset per-input-tuple context after each tuple */
1066                 ResetExprContext(tmpcontext);
1067         }
1068
1069         aggstate->table_filled = true;
1070         /* Initialize to walk the hash table */
1071         ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
1072 }
1073
1074 /*
1075  * ExecAgg for hashed case: phase 2, retrieving groups from hash table
1076  */
1077 static TupleTableSlot *
1078 agg_retrieve_hash_table(AggState *aggstate)
1079 {
1080         ExprContext *econtext;
1081         Datum      *aggvalues;
1082         bool       *aggnulls;
1083         AggStatePerAgg peragg;
1084         AggStatePerGroup pergroup;
1085         AggHashEntry entry;
1086         TupleTableSlot *firstSlot;
1087         int                     aggno;
1088
1089         /*
1090          * get state info from node
1091          */
1092         /* econtext is the per-output-tuple expression context */
1093         econtext = aggstate->ss.ps.ps_ExprContext;
1094         aggvalues = econtext->ecxt_aggvalues;
1095         aggnulls = econtext->ecxt_aggnulls;
1096         peragg = aggstate->peragg;
1097         firstSlot = aggstate->ss.ss_ScanTupleSlot;
1098
1099         /*
1100          * We loop retrieving groups until we find one satisfying
1101          * aggstate->ss.ps.qual
1102          */
1103         while (!aggstate->agg_done)
1104         {
1105                 /*
1106                  * Find the next entry in the hash table
1107                  */
1108                 entry = (AggHashEntry) ScanTupleHashTable(&aggstate->hashiter);
1109                 if (entry == NULL)
1110                 {
1111                         /* No more entries in hashtable, so done */
1112                         aggstate->agg_done = TRUE;
1113                         return NULL;
1114                 }
1115
1116                 /*
1117                  * Clear the per-output-tuple context for each group
1118                  */
1119                 ResetExprContext(econtext);
1120
1121                 /*
1122                  * Store the copied first input tuple in the tuple table slot reserved
1123                  * for it, so that it can be used in ExecProject.
1124                  */
1125                 ExecStoreMinimalTuple(entry->shared.firstTuple,
1126                                                           firstSlot,
1127                                                           false);
1128
1129                 pergroup = entry->pergroup;
1130
1131                 /*
1132                  * Finalize each aggregate calculation, and stash results in the
1133                  * per-output-tuple context.
1134                  */
1135                 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
1136                 {
1137                         AggStatePerAgg peraggstate = &peragg[aggno];
1138                         AggStatePerGroup pergroupstate = &pergroup[aggno];
1139
1140                         Assert(!peraggstate->aggref->aggdistinct);
1141                         finalize_aggregate(aggstate, peraggstate, pergroupstate,
1142                                                            &aggvalues[aggno], &aggnulls[aggno]);
1143                 }
1144
1145                 /*
1146                  * Use the representative input tuple for any references to
1147                  * non-aggregated input columns in the qual and tlist.
1148                  */
1149                 econtext->ecxt_outertuple = firstSlot;
1150
1151                 /*
1152                  * Check the qual (HAVING clause); if the group does not match, ignore
1153                  * it and loop back to try to process another group.
1154                  */
1155                 if (ExecQual(aggstate->ss.ps.qual, econtext, false))
1156                 {
1157                         /*
1158                          * Form and return a projection tuple using the aggregate results
1159                          * and the representative input tuple.
1160                          */
1161                         TupleTableSlot *result;
1162                         ExprDoneCond isDone;
1163
1164                         result = ExecProject(aggstate->ss.ps.ps_ProjInfo, &isDone);
1165
1166                         if (isDone != ExprEndResult)
1167                         {
1168                                 aggstate->ss.ps.ps_TupFromTlist =
1169                                         (isDone == ExprMultipleResult);
1170                                 return result;
1171                         }
1172                 }
1173         }
1174
1175         /* No more groups */
1176         return NULL;
1177 }
1178
1179 /* -----------------
1180  * ExecInitAgg
1181  *
1182  *      Creates the run-time information for the agg node produced by the
1183  *      planner and initializes its outer subtree
1184  * -----------------
1185  */
1186 AggState *
1187 ExecInitAgg(Agg *node, EState *estate, int eflags)
1188 {
1189         AggState   *aggstate;
1190         AggStatePerAgg peragg;
1191         Plan       *outerPlan;
1192         ExprContext *econtext;
1193         int                     numaggs,
1194                                 aggno;
1195         ListCell   *l;
1196
1197         /* check for unsupported flags */
1198         Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
1199
1200         /*
1201          * create state structure
1202          */
1203         aggstate = makeNode(AggState);
1204         aggstate->ss.ps.plan = (Plan *) node;
1205         aggstate->ss.ps.state = estate;
1206
1207         aggstate->aggs = NIL;
1208         aggstate->numaggs = 0;
1209         aggstate->eqfunctions = NULL;
1210         aggstate->hashfunctions = NULL;
1211         aggstate->peragg = NULL;
1212         aggstate->agg_done = false;
1213         aggstate->pergroup = NULL;
1214         aggstate->grp_firstTuple = NULL;
1215         aggstate->hashtable = NULL;
1216
1217         /*
1218          * Create expression contexts.  We need two, one for per-input-tuple
1219          * processing and one for per-output-tuple processing.  We cheat a little
1220          * by using ExecAssignExprContext() to build both.
1221          */
1222         ExecAssignExprContext(estate, &aggstate->ss.ps);
1223         aggstate->tmpcontext = aggstate->ss.ps.ps_ExprContext;
1224         ExecAssignExprContext(estate, &aggstate->ss.ps);
1225
1226         /*
1227          * We also need a long-lived memory context for holding hashtable data
1228          * structures and transition values.  NOTE: the details of what is stored
1229          * in aggcontext and what is stored in the regular per-query memory
1230          * context are driven by a simple decision: we want to reset the
1231          * aggcontext in ExecReScanAgg to recover no-longer-wanted space.
1232          */
1233         aggstate->aggcontext =
1234                 AllocSetContextCreate(CurrentMemoryContext,
1235                                                           "AggContext",
1236                                                           ALLOCSET_DEFAULT_MINSIZE,
1237                                                           ALLOCSET_DEFAULT_INITSIZE,
1238                                                           ALLOCSET_DEFAULT_MAXSIZE);
1239
1240 #define AGG_NSLOTS 3
1241
1242         /*
1243          * tuple table initialization
1244          */
1245         ExecInitScanTupleSlot(estate, &aggstate->ss);
1246         ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
1247         aggstate->hashslot = ExecInitExtraTupleSlot(estate);
1248
1249         /*
1250          * initialize child expressions
1251          *
1252          * Note: ExecInitExpr finds Aggrefs for us, and also checks that no aggs
1253          * contain other agg calls in their arguments.  This would make no sense
1254          * under SQL semantics anyway (and it's forbidden by the spec). Because
1255          * that is true, we don't need to worry about evaluating the aggs in any
1256          * particular order.
1257          */
1258         aggstate->ss.ps.targetlist = (List *)
1259                 ExecInitExpr((Expr *) node->plan.targetlist,
1260                                          (PlanState *) aggstate);
1261         aggstate->ss.ps.qual = (List *)
1262                 ExecInitExpr((Expr *) node->plan.qual,
1263                                          (PlanState *) aggstate);
1264
1265         /*
1266          * initialize child nodes
1267          *
1268          * If we are doing a hashed aggregation then the child plan does not need
1269          * to handle REWIND efficiently; see ExecReScanAgg.
1270          */
1271         if (node->aggstrategy == AGG_HASHED)
1272                 eflags &= ~EXEC_FLAG_REWIND;
1273         outerPlan = outerPlan(node);
1274         outerPlanState(aggstate) = ExecInitNode(outerPlan, estate, eflags);
1275
1276         /*
1277          * initialize source tuple type.
1278          */
1279         ExecAssignScanTypeFromOuterPlan(&aggstate->ss);
1280
1281         /*
1282          * Initialize result tuple type and projection info.
1283          */
1284         ExecAssignResultTypeFromTL(&aggstate->ss.ps);
1285         ExecAssignProjectionInfo(&aggstate->ss.ps, NULL);
1286
1287         aggstate->ss.ps.ps_TupFromTlist = false;
1288
1289         /*
1290          * get the count of aggregates in targetlist and quals
1291          */
1292         numaggs = aggstate->numaggs;
1293         Assert(numaggs == list_length(aggstate->aggs));
1294         if (numaggs <= 0)
1295         {
1296                 /*
1297                  * This is not an error condition: we might be using the Agg node just
1298                  * to do hash-based grouping.  Even in the regular case,
1299                  * constant-expression simplification could optimize away all of the
1300                  * Aggrefs in the targetlist and qual.  So keep going, but force local
1301                  * copy of numaggs positive so that palloc()s below don't choke.
1302                  */
1303                 numaggs = 1;
1304         }
1305
1306         /*
1307          * If we are grouping, precompute fmgr lookup data for inner loop. We need
1308          * both equality and hashing functions to do it by hashing, but only
1309          * equality if not hashing.
1310          */
1311         if (node->numCols > 0)
1312         {
1313                 if (node->aggstrategy == AGG_HASHED)
1314                         execTuplesHashPrepare(node->numCols,
1315                                                                   node->grpOperators,
1316                                                                   &aggstate->eqfunctions,
1317                                                                   &aggstate->hashfunctions);
1318                 else
1319                         aggstate->eqfunctions =
1320                                 execTuplesMatchPrepare(node->numCols,
1321                                                                            node->grpOperators);
1322         }
1323
1324         /*
1325          * Set up aggregate-result storage in the output expr context, and also
1326          * allocate my private per-agg working storage
1327          */
1328         econtext = aggstate->ss.ps.ps_ExprContext;
1329         econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numaggs);
1330         econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
1331
1332         peragg = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
1333         aggstate->peragg = peragg;
1334
1335         if (node->aggstrategy == AGG_HASHED)
1336         {
1337                 build_hash_table(aggstate);
1338                 aggstate->table_filled = false;
1339                 /* Compute the columns we actually need to hash on */
1340                 aggstate->hash_needed = find_hash_columns(aggstate);
1341         }
1342         else
1343         {
1344                 AggStatePerGroup pergroup;
1345
1346                 pergroup = (AggStatePerGroup) palloc0(sizeof(AggStatePerGroupData) * numaggs);
1347                 aggstate->pergroup = pergroup;
1348         }
1349
1350         /*
1351          * Perform lookups of aggregate function info, and initialize the
1352          * unchanging fields of the per-agg data.  We also detect duplicate
1353          * aggregates (for example, "SELECT sum(x) ... HAVING sum(x) > 0"). When
1354          * duplicates are detected, we only make an AggStatePerAgg struct for the
1355          * first one.  The clones are simply pointed at the same result entry by
1356          * giving them duplicate aggno values.
1357          */
1358         aggno = -1;
1359         foreach(l, aggstate->aggs)
1360         {
1361                 AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(l);
1362                 Aggref     *aggref = (Aggref *) aggrefstate->xprstate.expr;
1363                 AggStatePerAgg peraggstate;
1364                 Oid                     inputTypes[FUNC_MAX_ARGS];
1365                 int                     numArguments;
1366                 HeapTuple       aggTuple;
1367                 Form_pg_aggregate aggform;
1368                 Oid                     aggtranstype;
1369                 AclResult       aclresult;
1370                 Oid                     transfn_oid,
1371                                         finalfn_oid;
1372                 Expr       *transfnexpr,
1373                                    *finalfnexpr;
1374                 Datum           textInitVal;
1375                 int                     i;
1376                 ListCell   *lc;
1377
1378                 /* Planner should have assigned aggregate to correct level */
1379                 Assert(aggref->agglevelsup == 0);
1380
1381                 /* Look for a previous duplicate aggregate */
1382                 for (i = 0; i <= aggno; i++)
1383                 {
1384                         if (equal(aggref, peragg[i].aggref) &&
1385                                 !contain_volatile_functions((Node *) aggref))
1386                                 break;
1387                 }
1388                 if (i <= aggno)
1389                 {
1390                         /* Found a match to an existing entry, so just mark it */
1391                         aggrefstate->aggno = i;
1392                         continue;
1393                 }
1394
1395                 /* Nope, so assign a new PerAgg record */
1396                 peraggstate = &peragg[++aggno];
1397
1398                 /* Mark Aggref state node with assigned index in the result array */
1399                 aggrefstate->aggno = aggno;
1400
1401                 /* Fill in the peraggstate data */
1402                 peraggstate->aggrefstate = aggrefstate;
1403                 peraggstate->aggref = aggref;
1404                 numArguments = list_length(aggref->args);
1405                 peraggstate->numArguments = numArguments;
1406
1407                 /*
1408                  * Get actual datatypes of the inputs.  These could be different from
1409                  * the agg's declared input types, when the agg accepts ANY or a
1410                  * polymorphic type.
1411                  */
1412                 i = 0;
1413                 foreach(lc, aggref->args)
1414                 {
1415                         inputTypes[i++] = exprType((Node *) lfirst(lc));
1416                 }
1417
1418                 aggTuple = SearchSysCache(AGGFNOID,
1419                                                                   ObjectIdGetDatum(aggref->aggfnoid),
1420                                                                   0, 0, 0);
1421                 if (!HeapTupleIsValid(aggTuple))
1422                         elog(ERROR, "cache lookup failed for aggregate %u",
1423                                  aggref->aggfnoid);
1424                 aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
1425
1426                 /* Check permission to call aggregate function */
1427                 aclresult = pg_proc_aclcheck(aggref->aggfnoid, GetUserId(),
1428                                                                          ACL_EXECUTE);
1429                 if (aclresult != ACLCHECK_OK)
1430                         aclcheck_error(aclresult, ACL_KIND_PROC,
1431                                                    get_func_name(aggref->aggfnoid));
1432
1433                 peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn;
1434                 peraggstate->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
1435
1436                 /* Check that aggregate owner has permission to call component fns */
1437                 {
1438                         HeapTuple       procTuple;
1439                         Oid                     aggOwner;
1440
1441                         procTuple = SearchSysCache(PROCOID,
1442                                                                            ObjectIdGetDatum(aggref->aggfnoid),
1443                                                                            0, 0, 0);
1444                         if (!HeapTupleIsValid(procTuple))
1445                                 elog(ERROR, "cache lookup failed for function %u",
1446                                          aggref->aggfnoid);
1447                         aggOwner = ((Form_pg_proc) GETSTRUCT(procTuple))->proowner;
1448                         ReleaseSysCache(procTuple);
1449
1450                         aclresult = pg_proc_aclcheck(transfn_oid, aggOwner,
1451                                                                                  ACL_EXECUTE);
1452                         if (aclresult != ACLCHECK_OK)
1453                                 aclcheck_error(aclresult, ACL_KIND_PROC,
1454                                                            get_func_name(transfn_oid));
1455                         if (OidIsValid(finalfn_oid))
1456                         {
1457                                 aclresult = pg_proc_aclcheck(finalfn_oid, aggOwner,
1458                                                                                          ACL_EXECUTE);
1459                                 if (aclresult != ACLCHECK_OK)
1460                                         aclcheck_error(aclresult, ACL_KIND_PROC,
1461                                                                    get_func_name(finalfn_oid));
1462                         }
1463                 }
1464
1465                 /* resolve actual type of transition state, if polymorphic */
1466                 aggtranstype = aggform->aggtranstype;
1467                 if (IsPolymorphicType(aggtranstype))
1468                 {
1469                         /* have to fetch the agg's declared input types... */
1470                         Oid                *declaredArgTypes;
1471                         int                     agg_nargs;
1472
1473                         (void) get_func_signature(aggref->aggfnoid,
1474                                                                           &declaredArgTypes, &agg_nargs);
1475                         Assert(agg_nargs == numArguments);
1476                         aggtranstype = enforce_generic_type_consistency(inputTypes,
1477                                                                                                                         declaredArgTypes,
1478                                                                                                                         agg_nargs,
1479                                                                                                                         aggtranstype,
1480                                                                                                                         false);
1481                         pfree(declaredArgTypes);
1482                 }
1483
1484                 /* build expression trees using actual argument & result types */
1485                 build_aggregate_fnexprs(inputTypes,
1486                                                                 numArguments,
1487                                                                 aggtranstype,
1488                                                                 aggref->aggtype,
1489                                                                 transfn_oid,
1490                                                                 finalfn_oid,
1491                                                                 &transfnexpr,
1492                                                                 &finalfnexpr);
1493
1494                 fmgr_info(transfn_oid, &peraggstate->transfn);
1495                 peraggstate->transfn.fn_expr = (Node *) transfnexpr;
1496
1497                 if (OidIsValid(finalfn_oid))
1498                 {
1499                         fmgr_info(finalfn_oid, &peraggstate->finalfn);
1500                         peraggstate->finalfn.fn_expr = (Node *) finalfnexpr;
1501                 }
1502
1503                 get_typlenbyval(aggref->aggtype,
1504                                                 &peraggstate->resulttypeLen,
1505                                                 &peraggstate->resulttypeByVal);
1506                 get_typlenbyval(aggtranstype,
1507                                                 &peraggstate->transtypeLen,
1508                                                 &peraggstate->transtypeByVal);
1509
1510                 /*
1511                  * initval is potentially null, so don't try to access it as a struct
1512                  * field. Must do it the hard way with SysCacheGetAttr.
1513                  */
1514                 textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
1515                                                                           Anum_pg_aggregate_agginitval,
1516                                                                           &peraggstate->initValueIsNull);
1517
1518                 if (peraggstate->initValueIsNull)
1519                         peraggstate->initValue = (Datum) 0;
1520                 else
1521                         peraggstate->initValue = GetAggInitVal(textInitVal,
1522                                                                                                    aggtranstype);
1523
1524                 /*
1525                  * If the transfn is strict and the initval is NULL, make sure input
1526                  * type and transtype are the same (or at least binary-compatible), so
1527                  * that it's OK to use the first input value as the initial
1528                  * transValue.  This should have been checked at agg definition time,
1529                  * but just in case...
1530                  */
1531                 if (peraggstate->transfn.fn_strict && peraggstate->initValueIsNull)
1532                 {
1533                         if (numArguments < 1 ||
1534                                 !IsBinaryCoercible(inputTypes[0], aggtranstype))
1535                                 ereport(ERROR,
1536                                                 (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
1537                                                  errmsg("aggregate %u needs to have compatible input type and transition type",
1538                                                                 aggref->aggfnoid)));
1539                 }
1540
1541                 if (aggref->aggdistinct)
1542                 {
1543                         Oid                     lt_opr;
1544                         Oid                     eq_opr;
1545
1546                         /* We don't implement DISTINCT aggs in the HASHED case */
1547                         Assert(node->aggstrategy != AGG_HASHED);
1548
1549                         /*
1550                          * We don't currently implement DISTINCT aggs for aggs having more
1551                          * than one argument.  This isn't required for anything in the SQL
1552                          * spec, but really it ought to be implemented for
1553                          * feature-completeness.  FIXME someday.
1554                          */
1555                         if (numArguments != 1)
1556                                 ereport(ERROR,
1557                                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1558                                                  errmsg("DISTINCT is supported only for single-argument aggregates")));
1559
1560                         peraggstate->inputType = inputTypes[0];
1561                         get_typlenbyval(inputTypes[0],
1562                                                         &peraggstate->inputtypeLen,
1563                                                         &peraggstate->inputtypeByVal);
1564
1565                         /*
1566                          * Look up the sorting and comparison operators to use.  XXX it's
1567                          * pretty bletcherous to be making this sort of semantic decision
1568                          * in the executor.  Probably the parser should decide this and
1569                          * record it in the Aggref node ... or at latest, do it in the
1570                          * planner.
1571                          */
1572                         get_sort_group_operators(inputTypes[0],
1573                                                                          true, true, false,
1574                                                                          &lt_opr, &eq_opr, NULL);
1575                         fmgr_info(get_opcode(eq_opr), &(peraggstate->equalfn));
1576                         peraggstate->sortOperator = lt_opr;
1577                         peraggstate->sortstate = NULL;
1578                 }
1579
1580                 ReleaseSysCache(aggTuple);
1581         }
1582
1583         /* Update numaggs to match number of unique aggregates found */
1584         aggstate->numaggs = aggno + 1;
1585
1586         return aggstate;
1587 }
1588
1589 static Datum
1590 GetAggInitVal(Datum textInitVal, Oid transtype)
1591 {
1592         Oid                     typinput,
1593                                 typioparam;
1594         char       *strInitVal;
1595         Datum           initVal;
1596
1597         getTypeInputInfo(transtype, &typinput, &typioparam);
1598         strInitVal = TextDatumGetCString(textInitVal);
1599         initVal = OidInputFunctionCall(typinput, strInitVal,
1600                                                                    typioparam, -1);
1601         pfree(strInitVal);
1602         return initVal;
1603 }
1604
1605 int
1606 ExecCountSlotsAgg(Agg *node)
1607 {
1608         return ExecCountSlotsNode(outerPlan(node)) +
1609                 ExecCountSlotsNode(innerPlan(node)) +
1610                 AGG_NSLOTS;
1611 }
1612
1613 void
1614 ExecEndAgg(AggState *node)
1615 {
1616         PlanState  *outerPlan;
1617         int                     aggno;
1618
1619         /* Make sure we have closed any open tuplesorts */
1620         for (aggno = 0; aggno < node->numaggs; aggno++)
1621         {
1622                 AggStatePerAgg peraggstate = &node->peragg[aggno];
1623
1624                 if (peraggstate->sortstate)
1625                         tuplesort_end(peraggstate->sortstate);
1626         }
1627
1628         /*
1629          * Free both the expr contexts.
1630          */
1631         ExecFreeExprContext(&node->ss.ps);
1632         node->ss.ps.ps_ExprContext = node->tmpcontext;
1633         ExecFreeExprContext(&node->ss.ps);
1634
1635         /* clean up tuple table */
1636         ExecClearTuple(node->ss.ss_ScanTupleSlot);
1637
1638         MemoryContextDelete(node->aggcontext);
1639
1640         outerPlan = outerPlanState(node);
1641         ExecEndNode(outerPlan);
1642 }
1643
1644 void
1645 ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
1646 {
1647         ExprContext *econtext = node->ss.ps.ps_ExprContext;
1648         int                     aggno;
1649
1650         node->agg_done = false;
1651
1652         node->ss.ps.ps_TupFromTlist = false;
1653
1654         if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1655         {
1656                 /*
1657                  * In the hashed case, if we haven't yet built the hash table then we
1658                  * can just return; nothing done yet, so nothing to undo. If subnode's
1659                  * chgParam is not NULL then it will be re-scanned by ExecProcNode,
1660                  * else no reason to re-scan it at all.
1661                  */
1662                 if (!node->table_filled)
1663                         return;
1664
1665                 /*
1666                  * If we do have the hash table and the subplan does not have any
1667                  * parameter changes, then we can just rescan the existing hash table;
1668                  * no need to build it again.
1669                  */
1670                 if (((PlanState *) node)->lefttree->chgParam == NULL)
1671                 {
1672                         ResetTupleHashIterator(node->hashtable, &node->hashiter);
1673                         return;
1674                 }
1675         }
1676
1677         /* Make sure we have closed any open tuplesorts */
1678         for (aggno = 0; aggno < node->numaggs; aggno++)
1679         {
1680                 AggStatePerAgg peraggstate = &node->peragg[aggno];
1681
1682                 if (peraggstate->sortstate)
1683                         tuplesort_end(peraggstate->sortstate);
1684                 peraggstate->sortstate = NULL;
1685         }
1686
1687         /* Release first tuple of group, if we have made a copy */
1688         if (node->grp_firstTuple != NULL)
1689         {
1690                 heap_freetuple(node->grp_firstTuple);
1691                 node->grp_firstTuple = NULL;
1692         }
1693
1694         /* Forget current agg values */
1695         MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
1696         MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
1697
1698         /*
1699          * Release all temp storage. Note that with AGG_HASHED, the hash table is
1700          * allocated in a sub-context of the aggcontext. We're going to rebuild
1701          * the hash table from scratch, so we need to use
1702          * MemoryContextResetAndDeleteChildren() to avoid leaking the old hash
1703          * table's memory context header.
1704          */
1705         MemoryContextResetAndDeleteChildren(node->aggcontext);
1706
1707         if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1708         {
1709                 /* Rebuild an empty hash table */
1710                 build_hash_table(node);
1711                 node->table_filled = false;
1712         }
1713         else
1714         {
1715                 /*
1716                  * Reset the per-group state (in particular, mark transvalues null)
1717                  */
1718                 MemSet(node->pergroup, 0,
1719                            sizeof(AggStatePerGroupData) * node->numaggs);
1720         }
1721
1722         /*
1723          * if chgParam of subnode is not null then plan will be re-scanned by
1724          * first ExecProcNode.
1725          */
1726         if (((PlanState *) node)->lefttree->chgParam == NULL)
1727                 ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
1728 }
1729
1730 /*
1731  * aggregate_dummy - dummy execution routine for aggregate functions
1732  *
1733  * This function is listed as the implementation (prosrc field) of pg_proc
1734  * entries for aggregate functions.  Its only purpose is to throw an error
1735  * if someone mistakenly executes such a function in the normal way.
1736  *
1737  * Perhaps someday we could assign real meaning to the prosrc field of
1738  * an aggregate?
1739  */
1740 Datum
1741 aggregate_dummy(PG_FUNCTION_ARGS)
1742 {
1743         elog(ERROR, "aggregate function %u called as normal function",
1744                  fcinfo->flinfo->fn_oid);
1745         return (Datum) 0;                       /* keep compiler quiet */
1746 }