Fix oversight in previous error-reporting patch; mustn't pfree path string
[PostgreSQL.git] / src / backend / parser / parse_clause.c
blob58cef1cfc95dfeee559deeba9693849b67715efd
1 /*-------------------------------------------------------------------------
3 * parse_clause.c
4 * handle clauses in parser
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * $PostgreSQL$
13 *-------------------------------------------------------------------------
16 #include "postgres.h"
18 #include "access/heapam.h"
19 #include "catalog/heap.h"
20 #include "catalog/pg_type.h"
21 #include "commands/defrem.h"
22 #include "nodes/makefuncs.h"
23 #include "nodes/nodeFuncs.h"
24 #include "optimizer/tlist.h"
25 #include "optimizer/var.h"
26 #include "parser/analyze.h"
27 #include "parser/parsetree.h"
28 #include "parser/parse_clause.h"
29 #include "parser/parse_coerce.h"
30 #include "parser/parse_expr.h"
31 #include "parser/parse_oper.h"
32 #include "parser/parse_relation.h"
33 #include "parser/parse_target.h"
34 #include "rewrite/rewriteManip.h"
35 #include "utils/guc.h"
36 #include "utils/lsyscache.h"
37 #include "utils/rel.h"
40 #define ORDER_CLAUSE 0
41 #define GROUP_CLAUSE 1
42 #define DISTINCT_ON_CLAUSE 2
44 static char *clauseText[] = {"ORDER BY", "GROUP BY", "DISTINCT ON"};
46 static void extractRemainingColumns(List *common_colnames,
47 List *src_colnames, List *src_colvars,
48 List **res_colnames, List **res_colvars);
49 static Node *transformJoinUsingClause(ParseState *pstate,
50 List *leftVars, List *rightVars);
51 static Node *transformJoinOnClause(ParseState *pstate, JoinExpr *j,
52 RangeTblEntry *l_rte,
53 RangeTblEntry *r_rte,
54 List *relnamespace,
55 Relids containedRels);
56 static RangeTblEntry *transformTableEntry(ParseState *pstate, RangeVar *r);
57 static RangeTblEntry *transformCTEReference(ParseState *pstate, RangeVar *r,
58 CommonTableExpr *cte, Index levelsup);
59 static RangeTblEntry *transformRangeSubselect(ParseState *pstate,
60 RangeSubselect *r);
61 static RangeTblEntry *transformRangeFunction(ParseState *pstate,
62 RangeFunction *r);
63 static Node *transformFromClauseItem(ParseState *pstate, Node *n,
64 RangeTblEntry **top_rte, int *top_rti,
65 List **relnamespace,
66 Relids *containedRels);
67 static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype,
68 Var *l_colvar, Var *r_colvar);
69 static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node,
70 List **tlist, int clause);
71 static int get_matching_location(int sortgroupref,
72 List *sortgrouprefs, List *exprs);
73 static List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
74 List *sortlist, List *targetlist, SortBy *sortby,
75 bool resolveUnknown);
76 static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
77 List *grouplist, List *targetlist, int location,
78 bool resolveUnknown);
82 * transformFromClause -
83 * Process the FROM clause and add items to the query's range table,
84 * joinlist, and namespaces.
86 * Note: we assume that pstate's p_rtable, p_joinlist, p_relnamespace, and
87 * p_varnamespace lists were initialized to NIL when the pstate was created.
88 * We will add onto any entries already present --- this is needed for rule
89 * processing, as well as for UPDATE and DELETE.
91 * The range table may grow still further when we transform the expressions
92 * in the query's quals and target list. (This is possible because in
93 * POSTQUEL, we allowed references to relations not specified in the
94 * from-clause. PostgreSQL keeps this extension to standard SQL.)
96 void
97 transformFromClause(ParseState *pstate, List *frmList)
99 ListCell *fl;
102 * The grammar will have produced a list of RangeVars, RangeSubselects,
103 * RangeFunctions, and/or JoinExprs. Transform each one (possibly adding
104 * entries to the rtable), check for duplicate refnames, and then add it
105 * to the joinlist and namespaces.
107 foreach(fl, frmList)
109 Node *n = lfirst(fl);
110 RangeTblEntry *rte;
111 int rtindex;
112 List *relnamespace;
113 Relids containedRels;
115 n = transformFromClauseItem(pstate, n,
116 &rte,
117 &rtindex,
118 &relnamespace,
119 &containedRels);
120 checkNameSpaceConflicts(pstate, pstate->p_relnamespace, relnamespace);
121 pstate->p_joinlist = lappend(pstate->p_joinlist, n);
122 pstate->p_relnamespace = list_concat(pstate->p_relnamespace,
123 relnamespace);
124 pstate->p_varnamespace = lappend(pstate->p_varnamespace, rte);
125 bms_free(containedRels);
130 * setTargetTable
131 * Add the target relation of INSERT/UPDATE/DELETE to the range table,
132 * and make the special links to it in the ParseState.
134 * We also open the target relation and acquire a write lock on it.
135 * This must be done before processing the FROM list, in case the target
136 * is also mentioned as a source relation --- we want to be sure to grab
137 * the write lock before any read lock.
139 * If alsoSource is true, add the target to the query's joinlist and
140 * namespace. For INSERT, we don't want the target to be joined to;
141 * it's a destination of tuples, not a source. For UPDATE/DELETE,
142 * we do need to scan or join the target. (NOTE: we do not bother
143 * to check for namespace conflict; we assume that the namespace was
144 * initially empty in these cases.)
146 * Finally, we mark the relation as requiring the permissions specified
147 * by requiredPerms.
149 * Returns the rangetable index of the target relation.
152 setTargetTable(ParseState *pstate, RangeVar *relation,
153 bool inh, bool alsoSource, AclMode requiredPerms)
155 RangeTblEntry *rte;
156 int rtindex;
158 /* Close old target; this could only happen for multi-action rules */
159 if (pstate->p_target_relation != NULL)
160 heap_close(pstate->p_target_relation, NoLock);
163 * Open target rel and grab suitable lock (which we will hold till end of
164 * transaction).
166 * free_parsestate() will eventually do the corresponding heap_close(),
167 * but *not* release the lock.
169 pstate->p_target_relation = parserOpenTable(pstate, relation,
170 RowExclusiveLock);
173 * Now build an RTE.
175 rte = addRangeTableEntryForRelation(pstate, pstate->p_target_relation,
176 relation->alias, inh, false);
177 pstate->p_target_rangetblentry = rte;
179 /* assume new rte is at end */
180 rtindex = list_length(pstate->p_rtable);
181 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
184 * Override addRangeTableEntry's default ACL_SELECT permissions check, and
185 * instead mark target table as requiring exactly the specified
186 * permissions.
188 * If we find an explicit reference to the rel later during parse
189 * analysis, we will add the ACL_SELECT bit back again; see
190 * scanRTEForColumn (for simple field references), ExpandColumnRefStar
191 * (for foo.*) and ExpandAllTables (for *).
193 rte->requiredPerms = requiredPerms;
196 * If UPDATE/DELETE, add table to joinlist and namespaces.
198 if (alsoSource)
199 addRTEtoQuery(pstate, rte, true, true, true);
201 return rtindex;
205 * Simplify InhOption (yes/no/default) into boolean yes/no.
207 * The reason we do things this way is that we don't want to examine the
208 * SQL_inheritance option flag until parse_analyze() is run. Otherwise,
209 * we'd do the wrong thing with query strings that intermix SET commands
210 * with queries.
212 bool
213 interpretInhOption(InhOption inhOpt)
215 switch (inhOpt)
217 case INH_NO:
218 return false;
219 case INH_YES:
220 return true;
221 case INH_DEFAULT:
222 return SQL_inheritance;
224 elog(ERROR, "bogus InhOption value: %d", inhOpt);
225 return false; /* keep compiler quiet */
229 * Given a relation-options list (of DefElems), return true iff the specified
230 * table/result set should be created with OIDs. This needs to be done after
231 * parsing the query string because the return value can depend upon the
232 * default_with_oids GUC var.
234 bool
235 interpretOidsOption(List *defList)
237 ListCell *cell;
239 /* Scan list to see if OIDS was included */
240 foreach(cell, defList)
242 DefElem *def = (DefElem *) lfirst(cell);
244 if (pg_strcasecmp(def->defname, "oids") == 0)
245 return defGetBoolean(def);
248 /* OIDS option was not specified, so use default. */
249 return default_with_oids;
253 * Extract all not-in-common columns from column lists of a source table
255 static void
256 extractRemainingColumns(List *common_colnames,
257 List *src_colnames, List *src_colvars,
258 List **res_colnames, List **res_colvars)
260 List *new_colnames = NIL;
261 List *new_colvars = NIL;
262 ListCell *lnames,
263 *lvars;
265 Assert(list_length(src_colnames) == list_length(src_colvars));
267 forboth(lnames, src_colnames, lvars, src_colvars)
269 char *colname = strVal(lfirst(lnames));
270 bool match = false;
271 ListCell *cnames;
273 foreach(cnames, common_colnames)
275 char *ccolname = strVal(lfirst(cnames));
277 if (strcmp(colname, ccolname) == 0)
279 match = true;
280 break;
284 if (!match)
286 new_colnames = lappend(new_colnames, lfirst(lnames));
287 new_colvars = lappend(new_colvars, lfirst(lvars));
291 *res_colnames = new_colnames;
292 *res_colvars = new_colvars;
295 /* transformJoinUsingClause()
296 * Build a complete ON clause from a partially-transformed USING list.
297 * We are given lists of nodes representing left and right match columns.
298 * Result is a transformed qualification expression.
300 static Node *
301 transformJoinUsingClause(ParseState *pstate, List *leftVars, List *rightVars)
303 Node *result = NULL;
304 ListCell *lvars,
305 *rvars;
308 * We cheat a little bit here by building an untransformed operator tree
309 * whose leaves are the already-transformed Vars. This is OK because
310 * transformExpr() won't complain about already-transformed subnodes.
312 forboth(lvars, leftVars, rvars, rightVars)
314 Node *lvar = (Node *) lfirst(lvars);
315 Node *rvar = (Node *) lfirst(rvars);
316 A_Expr *e;
318 e = makeSimpleA_Expr(AEXPR_OP, "=",
319 copyObject(lvar), copyObject(rvar),
320 -1);
322 if (result == NULL)
323 result = (Node *) e;
324 else
326 A_Expr *a;
328 a = makeA_Expr(AEXPR_AND, NIL, result, (Node *) e, -1);
329 result = (Node *) a;
334 * Since the references are already Vars, and are certainly from the input
335 * relations, we don't have to go through the same pushups that
336 * transformJoinOnClause() does. Just invoke transformExpr() to fix up
337 * the operators, and we're done.
339 result = transformExpr(pstate, result);
341 result = coerce_to_boolean(pstate, result, "JOIN/USING");
343 return result;
346 /* transformJoinOnClause()
347 * Transform the qual conditions for JOIN/ON.
348 * Result is a transformed qualification expression.
350 static Node *
351 transformJoinOnClause(ParseState *pstate, JoinExpr *j,
352 RangeTblEntry *l_rte,
353 RangeTblEntry *r_rte,
354 List *relnamespace,
355 Relids containedRels)
357 Node *result;
358 List *save_relnamespace;
359 List *save_varnamespace;
360 Relids clause_varnos;
361 int varno;
364 * This is a tad tricky, for two reasons. First, the namespace that the
365 * join expression should see is just the two subtrees of the JOIN plus
366 * any outer references from upper pstate levels. So, temporarily set
367 * this pstate's namespace accordingly. (We need not check for refname
368 * conflicts, because transformFromClauseItem() already did.) NOTE: this
369 * code is OK only because the ON clause can't legally alter the namespace
370 * by causing implicit relation refs to be added.
372 save_relnamespace = pstate->p_relnamespace;
373 save_varnamespace = pstate->p_varnamespace;
375 pstate->p_relnamespace = relnamespace;
376 pstate->p_varnamespace = list_make2(l_rte, r_rte);
378 result = transformWhereClause(pstate, j->quals, "JOIN/ON");
380 pstate->p_relnamespace = save_relnamespace;
381 pstate->p_varnamespace = save_varnamespace;
384 * Second, we need to check that the ON condition doesn't refer to any
385 * rels outside the input subtrees of the JOIN. It could do that despite
386 * our hack on the namespace if it uses fully-qualified names. So, grovel
387 * through the transformed clause and make sure there are no bogus
388 * references. (Outer references are OK, and are ignored here.)
390 clause_varnos = pull_varnos(result);
391 clause_varnos = bms_del_members(clause_varnos, containedRels);
392 if ((varno = bms_first_member(clause_varnos)) >= 0)
394 ereport(ERROR,
395 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
396 errmsg("JOIN/ON clause refers to \"%s\", which is not part of JOIN",
397 rt_fetch(varno, pstate->p_rtable)->eref->aliasname),
398 parser_errposition(pstate,
399 locate_var_of_relation(result, varno, 0))));
401 bms_free(clause_varnos);
403 return result;
407 * transformTableEntry --- transform a RangeVar (simple relation reference)
409 static RangeTblEntry *
410 transformTableEntry(ParseState *pstate, RangeVar *r)
412 RangeTblEntry *rte;
415 * mark this entry to indicate it comes from the FROM clause. In SQL, the
416 * target list can only refer to range variables specified in the from
417 * clause but we follow the more powerful POSTQUEL semantics and
418 * automatically generate the range variable if not specified. However
419 * there are times we need to know whether the entries are legitimate.
421 rte = addRangeTableEntry(pstate, r, r->alias,
422 interpretInhOption(r->inhOpt), true);
424 return rte;
428 * transformCTEReference --- transform a RangeVar that references a common
429 * table expression (ie, a sub-SELECT defined in a WITH clause)
431 static RangeTblEntry *
432 transformCTEReference(ParseState *pstate, RangeVar *r,
433 CommonTableExpr *cte, Index levelsup)
435 RangeTblEntry *rte;
437 rte = addRangeTableEntryForCTE(pstate, cte, levelsup, r->alias, true);
439 return rte;
443 * transformRangeSubselect --- transform a sub-SELECT appearing in FROM
445 static RangeTblEntry *
446 transformRangeSubselect(ParseState *pstate, RangeSubselect *r)
448 Query *query;
449 RangeTblEntry *rte;
452 * We require user to supply an alias for a subselect, per SQL92. To relax
453 * this, we'd have to be prepared to gin up a unique alias for an
454 * unlabeled subselect. (This is just elog, not ereport, because the
455 * grammar should have enforced it already.)
457 if (r->alias == NULL)
458 elog(ERROR, "subquery in FROM must have an alias");
461 * Analyze and transform the subquery.
463 query = parse_sub_analyze(r->subquery, pstate);
466 * Check that we got something reasonable. Many of these conditions are
467 * impossible given restrictions of the grammar, but check 'em anyway.
469 if (!IsA(query, Query) ||
470 query->commandType != CMD_SELECT ||
471 query->utilityStmt != NULL)
472 elog(ERROR, "unexpected non-SELECT command in subquery in FROM");
473 if (query->intoClause)
474 ereport(ERROR,
475 (errcode(ERRCODE_SYNTAX_ERROR),
476 errmsg("subquery in FROM cannot have SELECT INTO"),
477 parser_errposition(pstate,
478 exprLocation((Node *) query->intoClause))));
481 * The subquery cannot make use of any variables from FROM items created
482 * earlier in the current query. Per SQL92, the scope of a FROM item does
483 * not include other FROM items. Formerly we hacked the namespace so that
484 * the other variables weren't even visible, but it seems more useful to
485 * leave them visible and give a specific error message.
487 * XXX this will need further work to support SQL99's LATERAL() feature,
488 * wherein such references would indeed be legal.
490 * We can skip groveling through the subquery if there's not anything
491 * visible in the current query. Also note that outer references are OK.
493 if (pstate->p_relnamespace || pstate->p_varnamespace)
495 if (contain_vars_of_level((Node *) query, 1))
496 ereport(ERROR,
497 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
498 errmsg("subquery in FROM cannot refer to other relations of same query level"),
499 parser_errposition(pstate,
500 locate_var_of_level((Node *) query, 1))));
504 * OK, build an RTE for the subquery.
506 rte = addRangeTableEntryForSubquery(pstate, query, r->alias, true);
508 return rte;
513 * transformRangeFunction --- transform a function call appearing in FROM
515 static RangeTblEntry *
516 transformRangeFunction(ParseState *pstate, RangeFunction *r)
518 Node *funcexpr;
519 char *funcname;
520 RangeTblEntry *rte;
523 * Get function name for possible use as alias. We use the same
524 * transformation rules as for a SELECT output expression. For a FuncCall
525 * node, the result will be the function name, but it is possible for the
526 * grammar to hand back other node types.
528 funcname = FigureColname(r->funccallnode);
531 * Transform the raw expression.
533 funcexpr = transformExpr(pstate, r->funccallnode);
536 * The function parameters cannot make use of any variables from other
537 * FROM items. (Compare to transformRangeSubselect(); the coding is
538 * different though because we didn't parse as a sub-select with its own
539 * level of namespace.)
541 * XXX this will need further work to support SQL99's LATERAL() feature,
542 * wherein such references would indeed be legal.
544 if (pstate->p_relnamespace || pstate->p_varnamespace)
546 if (contain_vars_of_level(funcexpr, 0))
547 ereport(ERROR,
548 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
549 errmsg("function expression in FROM cannot refer to other relations of same query level"),
550 parser_errposition(pstate,
551 locate_var_of_level(funcexpr, 0))));
555 * Disallow aggregate functions in the expression. (No reason to postpone
556 * this check until parseCheckAggregates.)
558 if (pstate->p_hasAggs)
560 if (checkExprHasAggs(funcexpr))
561 ereport(ERROR,
562 (errcode(ERRCODE_GROUPING_ERROR),
563 errmsg("cannot use aggregate function in function expression in FROM"),
564 parser_errposition(pstate,
565 locate_agg_of_level(funcexpr, 0))));
569 * OK, build an RTE for the function.
571 rte = addRangeTableEntryForFunction(pstate, funcname, funcexpr,
572 r, true);
575 * If a coldeflist was supplied, ensure it defines a legal set of names
576 * (no duplicates) and datatypes (no pseudo-types, for instance).
577 * addRangeTableEntryForFunction looked up the type names but didn't check
578 * them further than that.
580 if (r->coldeflist)
582 TupleDesc tupdesc;
584 tupdesc = BuildDescFromLists(rte->eref->colnames,
585 rte->funccoltypes,
586 rte->funccoltypmods);
587 CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE);
590 return rte;
595 * transformFromClauseItem -
596 * Transform a FROM-clause item, adding any required entries to the
597 * range table list being built in the ParseState, and return the
598 * transformed item ready to include in the joinlist and namespaces.
599 * This routine can recurse to handle SQL92 JOIN expressions.
601 * The function return value is the node to add to the jointree (a
602 * RangeTblRef or JoinExpr). Additional output parameters are:
604 * *top_rte: receives the RTE corresponding to the jointree item.
605 * (We could extract this from the function return node, but it saves cycles
606 * to pass it back separately.)
608 * *top_rti: receives the rangetable index of top_rte. (Ditto.)
610 * *relnamespace: receives a List of the RTEs exposed as relation names
611 * by this item.
613 * *containedRels: receives a bitmap set of the rangetable indexes
614 * of all the base and join relations represented in this jointree item.
615 * This is needed for checking JOIN/ON conditions in higher levels.
617 * We do not need to pass back an explicit varnamespace value, because
618 * in all cases the varnamespace contribution is exactly top_rte.
620 static Node *
621 transformFromClauseItem(ParseState *pstate, Node *n,
622 RangeTblEntry **top_rte, int *top_rti,
623 List **relnamespace,
624 Relids *containedRels)
626 if (IsA(n, RangeVar))
628 /* Plain relation reference, or perhaps a CTE reference */
629 RangeVar *rv = (RangeVar *) n;
630 RangeTblRef *rtr;
631 RangeTblEntry *rte = NULL;
632 int rtindex;
634 /* if it is an unqualified name, it might be a CTE reference */
635 if (!rv->schemaname)
637 CommonTableExpr *cte;
638 Index levelsup;
640 cte = scanNameSpaceForCTE(pstate, rv->relname, &levelsup);
641 if (cte)
642 rte = transformCTEReference(pstate, rv, cte, levelsup);
645 /* if not found as a CTE, must be a table reference */
646 if (!rte)
647 rte = transformTableEntry(pstate, rv);
649 /* assume new rte is at end */
650 rtindex = list_length(pstate->p_rtable);
651 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
652 *top_rte = rte;
653 *top_rti = rtindex;
654 *relnamespace = list_make1(rte);
655 *containedRels = bms_make_singleton(rtindex);
656 rtr = makeNode(RangeTblRef);
657 rtr->rtindex = rtindex;
658 return (Node *) rtr;
660 else if (IsA(n, RangeSubselect))
662 /* sub-SELECT is like a plain relation */
663 RangeTblRef *rtr;
664 RangeTblEntry *rte;
665 int rtindex;
667 rte = transformRangeSubselect(pstate, (RangeSubselect *) n);
668 /* assume new rte is at end */
669 rtindex = list_length(pstate->p_rtable);
670 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
671 *top_rte = rte;
672 *top_rti = rtindex;
673 *relnamespace = list_make1(rte);
674 *containedRels = bms_make_singleton(rtindex);
675 rtr = makeNode(RangeTblRef);
676 rtr->rtindex = rtindex;
677 return (Node *) rtr;
679 else if (IsA(n, RangeFunction))
681 /* function is like a plain relation */
682 RangeTblRef *rtr;
683 RangeTblEntry *rte;
684 int rtindex;
686 rte = transformRangeFunction(pstate, (RangeFunction *) n);
687 /* assume new rte is at end */
688 rtindex = list_length(pstate->p_rtable);
689 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
690 *top_rte = rte;
691 *top_rti = rtindex;
692 *relnamespace = list_make1(rte);
693 *containedRels = bms_make_singleton(rtindex);
694 rtr = makeNode(RangeTblRef);
695 rtr->rtindex = rtindex;
696 return (Node *) rtr;
698 else if (IsA(n, JoinExpr))
700 /* A newfangled join expression */
701 JoinExpr *j = (JoinExpr *) n;
702 RangeTblEntry *l_rte;
703 RangeTblEntry *r_rte;
704 int l_rtindex;
705 int r_rtindex;
706 Relids l_containedRels,
707 r_containedRels,
708 my_containedRels;
709 List *l_relnamespace,
710 *r_relnamespace,
711 *my_relnamespace,
712 *l_colnames,
713 *r_colnames,
714 *res_colnames,
715 *l_colvars,
716 *r_colvars,
717 *res_colvars;
718 RangeTblEntry *rte;
721 * Recursively process the left and right subtrees
723 j->larg = transformFromClauseItem(pstate, j->larg,
724 &l_rte,
725 &l_rtindex,
726 &l_relnamespace,
727 &l_containedRels);
728 j->rarg = transformFromClauseItem(pstate, j->rarg,
729 &r_rte,
730 &r_rtindex,
731 &r_relnamespace,
732 &r_containedRels);
735 * Check for conflicting refnames in left and right subtrees. Must do
736 * this because higher levels will assume I hand back a self-
737 * consistent namespace subtree.
739 checkNameSpaceConflicts(pstate, l_relnamespace, r_relnamespace);
742 * Generate combined relation membership info for possible use by
743 * transformJoinOnClause below.
745 my_relnamespace = list_concat(l_relnamespace, r_relnamespace);
746 my_containedRels = bms_join(l_containedRels, r_containedRels);
748 pfree(r_relnamespace); /* free unneeded list header */
751 * Extract column name and var lists from both subtrees
753 * Note: expandRTE returns new lists, safe for me to modify
755 expandRTE(l_rte, l_rtindex, 0, -1, false,
756 &l_colnames, &l_colvars);
757 expandRTE(r_rte, r_rtindex, 0, -1, false,
758 &r_colnames, &r_colvars);
761 * Natural join does not explicitly specify columns; must generate
762 * columns to join. Need to run through the list of columns from each
763 * table or join result and match up the column names. Use the first
764 * table, and check every column in the second table for a match.
765 * (We'll check that the matches were unique later on.) The result of
766 * this step is a list of column names just like an explicitly-written
767 * USING list.
769 if (j->isNatural)
771 List *rlist = NIL;
772 ListCell *lx,
773 *rx;
775 Assert(j->using == NIL); /* shouldn't have USING() too */
777 foreach(lx, l_colnames)
779 char *l_colname = strVal(lfirst(lx));
780 Value *m_name = NULL;
782 foreach(rx, r_colnames)
784 char *r_colname = strVal(lfirst(rx));
786 if (strcmp(l_colname, r_colname) == 0)
788 m_name = makeString(l_colname);
789 break;
793 /* matched a right column? then keep as join column... */
794 if (m_name != NULL)
795 rlist = lappend(rlist, m_name);
798 j->using = rlist;
802 * Now transform the join qualifications, if any.
804 res_colnames = NIL;
805 res_colvars = NIL;
807 if (j->using)
810 * JOIN/USING (or NATURAL JOIN, as transformed above). Transform
811 * the list into an explicit ON-condition, and generate a list of
812 * merged result columns.
814 List *ucols = j->using;
815 List *l_usingvars = NIL;
816 List *r_usingvars = NIL;
817 ListCell *ucol;
819 Assert(j->quals == NULL); /* shouldn't have ON() too */
821 foreach(ucol, ucols)
823 char *u_colname = strVal(lfirst(ucol));
824 ListCell *col;
825 int ndx;
826 int l_index = -1;
827 int r_index = -1;
828 Var *l_colvar,
829 *r_colvar;
831 /* Check for USING(foo,foo) */
832 foreach(col, res_colnames)
834 char *res_colname = strVal(lfirst(col));
836 if (strcmp(res_colname, u_colname) == 0)
837 ereport(ERROR,
838 (errcode(ERRCODE_DUPLICATE_COLUMN),
839 errmsg("column name \"%s\" appears more than once in USING clause",
840 u_colname)));
843 /* Find it in left input */
844 ndx = 0;
845 foreach(col, l_colnames)
847 char *l_colname = strVal(lfirst(col));
849 if (strcmp(l_colname, u_colname) == 0)
851 if (l_index >= 0)
852 ereport(ERROR,
853 (errcode(ERRCODE_AMBIGUOUS_COLUMN),
854 errmsg("common column name \"%s\" appears more than once in left table",
855 u_colname)));
856 l_index = ndx;
858 ndx++;
860 if (l_index < 0)
861 ereport(ERROR,
862 (errcode(ERRCODE_UNDEFINED_COLUMN),
863 errmsg("column \"%s\" specified in USING clause does not exist in left table",
864 u_colname)));
866 /* Find it in right input */
867 ndx = 0;
868 foreach(col, r_colnames)
870 char *r_colname = strVal(lfirst(col));
872 if (strcmp(r_colname, u_colname) == 0)
874 if (r_index >= 0)
875 ereport(ERROR,
876 (errcode(ERRCODE_AMBIGUOUS_COLUMN),
877 errmsg("common column name \"%s\" appears more than once in right table",
878 u_colname)));
879 r_index = ndx;
881 ndx++;
883 if (r_index < 0)
884 ereport(ERROR,
885 (errcode(ERRCODE_UNDEFINED_COLUMN),
886 errmsg("column \"%s\" specified in USING clause does not exist in right table",
887 u_colname)));
889 l_colvar = list_nth(l_colvars, l_index);
890 l_usingvars = lappend(l_usingvars, l_colvar);
891 r_colvar = list_nth(r_colvars, r_index);
892 r_usingvars = lappend(r_usingvars, r_colvar);
894 res_colnames = lappend(res_colnames, lfirst(ucol));
895 res_colvars = lappend(res_colvars,
896 buildMergedJoinVar(pstate,
897 j->jointype,
898 l_colvar,
899 r_colvar));
902 j->quals = transformJoinUsingClause(pstate,
903 l_usingvars,
904 r_usingvars);
906 else if (j->quals)
908 /* User-written ON-condition; transform it */
909 j->quals = transformJoinOnClause(pstate, j,
910 l_rte, r_rte,
911 my_relnamespace,
912 my_containedRels);
914 else
916 /* CROSS JOIN: no quals */
919 /* Add remaining columns from each side to the output columns */
920 extractRemainingColumns(res_colnames,
921 l_colnames, l_colvars,
922 &l_colnames, &l_colvars);
923 extractRemainingColumns(res_colnames,
924 r_colnames, r_colvars,
925 &r_colnames, &r_colvars);
926 res_colnames = list_concat(res_colnames, l_colnames);
927 res_colvars = list_concat(res_colvars, l_colvars);
928 res_colnames = list_concat(res_colnames, r_colnames);
929 res_colvars = list_concat(res_colvars, r_colvars);
932 * Check alias (AS clause), if any.
934 if (j->alias)
936 if (j->alias->colnames != NIL)
938 if (list_length(j->alias->colnames) > list_length(res_colnames))
939 ereport(ERROR,
940 (errcode(ERRCODE_SYNTAX_ERROR),
941 errmsg("column alias list for \"%s\" has too many entries",
942 j->alias->aliasname)));
947 * Now build an RTE for the result of the join
949 rte = addRangeTableEntryForJoin(pstate,
950 res_colnames,
951 j->jointype,
952 res_colvars,
953 j->alias,
954 true);
956 /* assume new rte is at end */
957 j->rtindex = list_length(pstate->p_rtable);
958 Assert(rte == rt_fetch(j->rtindex, pstate->p_rtable));
960 *top_rte = rte;
961 *top_rti = j->rtindex;
964 * Prepare returned namespace list. If the JOIN has an alias then it
965 * hides the contained RTEs as far as the relnamespace goes;
966 * otherwise, put the contained RTEs and *not* the JOIN into
967 * relnamespace.
969 if (j->alias)
971 *relnamespace = list_make1(rte);
972 list_free(my_relnamespace);
974 else
975 *relnamespace = my_relnamespace;
978 * Include join RTE in returned containedRels set
980 *containedRels = bms_add_member(my_containedRels, j->rtindex);
982 return (Node *) j;
984 else
985 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
986 return NULL; /* can't get here, keep compiler quiet */
990 * buildMergedJoinVar -
991 * generate a suitable replacement expression for a merged join column
993 static Node *
994 buildMergedJoinVar(ParseState *pstate, JoinType jointype,
995 Var *l_colvar, Var *r_colvar)
997 Oid outcoltype;
998 int32 outcoltypmod;
999 Node *l_node,
1000 *r_node,
1001 *res_node;
1004 * Choose output type if input types are dissimilar.
1006 outcoltype = l_colvar->vartype;
1007 outcoltypmod = l_colvar->vartypmod;
1008 if (outcoltype != r_colvar->vartype)
1010 outcoltype = select_common_type(pstate,
1011 list_make2(l_colvar, r_colvar),
1012 "JOIN/USING",
1013 NULL);
1014 outcoltypmod = -1; /* ie, unknown */
1016 else if (outcoltypmod != r_colvar->vartypmod)
1018 /* same type, but not same typmod */
1019 outcoltypmod = -1; /* ie, unknown */
1023 * Insert coercion functions if needed. Note that a difference in typmod
1024 * can only happen if input has typmod but outcoltypmod is -1. In that
1025 * case we insert a RelabelType to clearly mark that result's typmod is
1026 * not same as input. We never need coerce_type_typmod.
1028 if (l_colvar->vartype != outcoltype)
1029 l_node = coerce_type(pstate, (Node *) l_colvar, l_colvar->vartype,
1030 outcoltype, outcoltypmod,
1031 COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, -1);
1032 else if (l_colvar->vartypmod != outcoltypmod)
1033 l_node = (Node *) makeRelabelType((Expr *) l_colvar,
1034 outcoltype, outcoltypmod,
1035 COERCE_IMPLICIT_CAST);
1036 else
1037 l_node = (Node *) l_colvar;
1039 if (r_colvar->vartype != outcoltype)
1040 r_node = coerce_type(pstate, (Node *) r_colvar, r_colvar->vartype,
1041 outcoltype, outcoltypmod,
1042 COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, -1);
1043 else if (r_colvar->vartypmod != outcoltypmod)
1044 r_node = (Node *) makeRelabelType((Expr *) r_colvar,
1045 outcoltype, outcoltypmod,
1046 COERCE_IMPLICIT_CAST);
1047 else
1048 r_node = (Node *) r_colvar;
1051 * Choose what to emit
1053 switch (jointype)
1055 case JOIN_INNER:
1058 * We can use either var; prefer non-coerced one if available.
1060 if (IsA(l_node, Var))
1061 res_node = l_node;
1062 else if (IsA(r_node, Var))
1063 res_node = r_node;
1064 else
1065 res_node = l_node;
1066 break;
1067 case JOIN_LEFT:
1068 /* Always use left var */
1069 res_node = l_node;
1070 break;
1071 case JOIN_RIGHT:
1072 /* Always use right var */
1073 res_node = r_node;
1074 break;
1075 case JOIN_FULL:
1078 * Here we must build a COALESCE expression to ensure that the
1079 * join output is non-null if either input is.
1081 CoalesceExpr *c = makeNode(CoalesceExpr);
1083 c->coalescetype = outcoltype;
1084 c->args = list_make2(l_node, r_node);
1085 c->location = -1;
1086 res_node = (Node *) c;
1087 break;
1089 default:
1090 elog(ERROR, "unrecognized join type: %d", (int) jointype);
1091 res_node = NULL; /* keep compiler quiet */
1092 break;
1095 return res_node;
1100 * transformWhereClause -
1101 * Transform the qualification and make sure it is of type boolean.
1102 * Used for WHERE and allied clauses.
1104 * constructName does not affect the semantics, but is used in error messages
1106 Node *
1107 transformWhereClause(ParseState *pstate, Node *clause,
1108 const char *constructName)
1110 Node *qual;
1112 if (clause == NULL)
1113 return NULL;
1115 qual = transformExpr(pstate, clause);
1117 qual = coerce_to_boolean(pstate, qual, constructName);
1119 return qual;
1124 * transformLimitClause -
1125 * Transform the expression and make sure it is of type bigint.
1126 * Used for LIMIT and allied clauses.
1128 * Note: as of Postgres 8.2, LIMIT expressions are expected to yield int8,
1129 * rather than int4 as before.
1131 * constructName does not affect the semantics, but is used in error messages
1133 Node *
1134 transformLimitClause(ParseState *pstate, Node *clause,
1135 const char *constructName)
1137 Node *qual;
1139 if (clause == NULL)
1140 return NULL;
1142 qual = transformExpr(pstate, clause);
1144 qual = coerce_to_specific_type(pstate, qual, INT8OID, constructName);
1147 * LIMIT can't refer to any vars or aggregates of the current query
1149 if (contain_vars_of_level(qual, 0))
1151 ereport(ERROR,
1152 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1153 /* translator: %s is name of a SQL construct, eg LIMIT */
1154 errmsg("argument of %s must not contain variables",
1155 constructName),
1156 parser_errposition(pstate,
1157 locate_var_of_level(qual, 0))));
1159 if (checkExprHasAggs(qual))
1161 ereport(ERROR,
1162 (errcode(ERRCODE_GROUPING_ERROR),
1163 /* translator: %s is name of a SQL construct, eg LIMIT */
1164 errmsg("argument of %s must not contain aggregates",
1165 constructName),
1166 parser_errposition(pstate,
1167 locate_agg_of_level(qual, 0))));
1170 return qual;
1175 * findTargetlistEntry -
1176 * Returns the targetlist entry matching the given (untransformed) node.
1177 * If no matching entry exists, one is created and appended to the target
1178 * list as a "resjunk" node.
1180 * node the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
1181 * tlist the target list (passed by reference so we can append to it)
1182 * clause identifies clause type being processed
1184 static TargetEntry *
1185 findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
1187 TargetEntry *target_result = NULL;
1188 ListCell *tl;
1189 Node *expr;
1191 /*----------
1192 * Handle two special cases as mandated by the SQL92 spec:
1194 * 1. Bare ColumnName (no qualifier or subscripts)
1195 * For a bare identifier, we search for a matching column name
1196 * in the existing target list. Multiple matches are an error
1197 * unless they refer to identical values; for example,
1198 * we allow SELECT a, a FROM table ORDER BY a
1199 * but not SELECT a AS b, b FROM table ORDER BY b
1200 * If no match is found, we fall through and treat the identifier
1201 * as an expression.
1202 * For GROUP BY, it is incorrect to match the grouping item against
1203 * targetlist entries: according to SQL92, an identifier in GROUP BY
1204 * is a reference to a column name exposed by FROM, not to a target
1205 * list column. However, many implementations (including pre-7.0
1206 * PostgreSQL) accept this anyway. So for GROUP BY, we look first
1207 * to see if the identifier matches any FROM column name, and only
1208 * try for a targetlist name if it doesn't. This ensures that we
1209 * adhere to the spec in the case where the name could be both.
1210 * DISTINCT ON isn't in the standard, so we can do what we like there;
1211 * we choose to make it work like ORDER BY, on the rather flimsy
1212 * grounds that ordinary DISTINCT works on targetlist entries.
1214 * 2. IntegerConstant
1215 * This means to use the n'th item in the existing target list.
1216 * Note that it would make no sense to order/group/distinct by an
1217 * actual constant, so this does not create a conflict with our
1218 * extension to order/group by an expression.
1219 * GROUP BY column-number is not allowed by SQL92, but since
1220 * the standard has no other behavior defined for this syntax,
1221 * we may as well accept this common extension.
1223 * Note that pre-existing resjunk targets must not be used in either case,
1224 * since the user didn't write them in his SELECT list.
1226 * If neither special case applies, fall through to treat the item as
1227 * an expression.
1228 *----------
1230 if (IsA(node, ColumnRef) &&
1231 list_length(((ColumnRef *) node)->fields) == 1 &&
1232 IsA(linitial(((ColumnRef *) node)->fields), String))
1234 char *name = strVal(linitial(((ColumnRef *) node)->fields));
1235 int location = ((ColumnRef *) node)->location;
1237 if (clause == GROUP_CLAUSE)
1240 * In GROUP BY, we must prefer a match against a FROM-clause
1241 * column to one against the targetlist. Look to see if there is
1242 * a matching column. If so, fall through to let transformExpr()
1243 * do the rest. NOTE: if name could refer ambiguously to more
1244 * than one column name exposed by FROM, colNameToVar will
1245 * ereport(ERROR). That's just what we want here.
1247 * Small tweak for 7.4.3: ignore matches in upper query levels.
1248 * This effectively changes the search order for bare names to (1)
1249 * local FROM variables, (2) local targetlist aliases, (3) outer
1250 * FROM variables, whereas before it was (1) (3) (2). SQL92 and
1251 * SQL99 do not allow GROUPing BY an outer reference, so this
1252 * breaks no cases that are legal per spec, and it seems a more
1253 * self-consistent behavior.
1255 if (colNameToVar(pstate, name, true, location) != NULL)
1256 name = NULL;
1259 if (name != NULL)
1261 foreach(tl, *tlist)
1263 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1265 if (!tle->resjunk &&
1266 strcmp(tle->resname, name) == 0)
1268 if (target_result != NULL)
1270 if (!equal(target_result->expr, tle->expr))
1271 ereport(ERROR,
1272 (errcode(ERRCODE_AMBIGUOUS_COLUMN),
1274 /*------
1275 translator: first %s is name of a SQL construct, eg ORDER BY */
1276 errmsg("%s \"%s\" is ambiguous",
1277 clauseText[clause], name),
1278 parser_errposition(pstate, location)));
1280 else
1281 target_result = tle;
1282 /* Stay in loop to check for ambiguity */
1285 if (target_result != NULL)
1286 return target_result; /* return the first match */
1289 if (IsA(node, A_Const))
1291 Value *val = &((A_Const *) node)->val;
1292 int location = ((A_Const *) node)->location;
1293 int targetlist_pos = 0;
1294 int target_pos;
1296 if (!IsA(val, Integer))
1297 ereport(ERROR,
1298 (errcode(ERRCODE_SYNTAX_ERROR),
1299 /* translator: %s is name of a SQL construct, eg ORDER BY */
1300 errmsg("non-integer constant in %s",
1301 clauseText[clause]),
1302 parser_errposition(pstate, location)));
1304 target_pos = intVal(val);
1305 foreach(tl, *tlist)
1307 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1309 if (!tle->resjunk)
1311 if (++targetlist_pos == target_pos)
1312 return tle; /* return the unique match */
1315 ereport(ERROR,
1316 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1317 /* translator: %s is name of a SQL construct, eg ORDER BY */
1318 errmsg("%s position %d is not in select list",
1319 clauseText[clause], target_pos),
1320 parser_errposition(pstate, location)));
1324 * Otherwise, we have an expression (this is a Postgres extension not
1325 * found in SQL92). Convert the untransformed node to a transformed
1326 * expression, and search for a match in the tlist. NOTE: it doesn't
1327 * really matter whether there is more than one match. Also, we are
1328 * willing to match a resjunk target here, though the above cases must
1329 * ignore resjunk targets.
1331 expr = transformExpr(pstate, node);
1333 foreach(tl, *tlist)
1335 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1337 if (equal(expr, tle->expr))
1338 return tle;
1342 * If no matches, construct a new target entry which is appended to the
1343 * end of the target list. This target is given resjunk = TRUE so that it
1344 * will not be projected into the final tuple.
1346 target_result = transformTargetEntry(pstate, node, expr, NULL, true);
1348 *tlist = lappend(*tlist, target_result);
1350 return target_result;
1354 * transformGroupClause -
1355 * transform a GROUP BY clause
1357 * GROUP BY items will be added to the targetlist (as resjunk columns)
1358 * if not already present, so the targetlist must be passed by reference.
1360 List *
1361 transformGroupClause(ParseState *pstate, List *grouplist,
1362 List **targetlist, List *sortClause)
1364 List *result = NIL;
1365 ListCell *gl;
1367 foreach(gl, grouplist)
1369 Node *gexpr = (Node *) lfirst(gl);
1370 TargetEntry *tle;
1371 bool found = false;
1373 tle = findTargetlistEntry(pstate, gexpr,
1374 targetlist, GROUP_CLAUSE);
1376 /* Eliminate duplicates (GROUP BY x, x) */
1377 if (targetIsInSortList(tle, InvalidOid, result))
1378 continue;
1381 * If the GROUP BY tlist entry also appears in ORDER BY, copy operator
1382 * info from the (first) matching ORDER BY item. This means that if
1383 * you write something like "GROUP BY foo ORDER BY foo USING <<<", the
1384 * GROUP BY operation silently takes on the equality semantics implied
1385 * by the ORDER BY. There are two reasons to do this: it improves
1386 * the odds that we can implement both GROUP BY and ORDER BY with a
1387 * single sort step, and it allows the user to choose the equality
1388 * semantics used by GROUP BY, should she be working with a datatype
1389 * that has more than one equality operator.
1391 if (tle->ressortgroupref > 0)
1393 ListCell *sl;
1395 foreach(sl, sortClause)
1397 SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
1399 if (sc->tleSortGroupRef == tle->ressortgroupref)
1401 result = lappend(result, copyObject(sc));
1402 found = true;
1403 break;
1409 * If no match in ORDER BY, just add it to the result using
1410 * default sort/group semantics.
1412 if (!found)
1413 result = addTargetToGroupList(pstate, tle,
1414 result, *targetlist,
1415 exprLocation(gexpr),
1416 true);
1419 return result;
1423 * transformSortClause -
1424 * transform an ORDER BY clause
1426 * ORDER BY items will be added to the targetlist (as resjunk columns)
1427 * if not already present, so the targetlist must be passed by reference.
1429 List *
1430 transformSortClause(ParseState *pstate,
1431 List *orderlist,
1432 List **targetlist,
1433 bool resolveUnknown)
1435 List *sortlist = NIL;
1436 ListCell *olitem;
1438 foreach(olitem, orderlist)
1440 SortBy *sortby = (SortBy *) lfirst(olitem);
1441 TargetEntry *tle;
1443 tle = findTargetlistEntry(pstate, sortby->node,
1444 targetlist, ORDER_CLAUSE);
1446 sortlist = addTargetToSortList(pstate, tle,
1447 sortlist, *targetlist, sortby,
1448 resolveUnknown);
1451 return sortlist;
1455 * transformDistinctClause -
1456 * transform a DISTINCT clause
1458 * Since we may need to add items to the query's targetlist, that list
1459 * is passed by reference.
1461 * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as
1462 * possible into the distinctClause. This avoids a possible need to re-sort,
1463 * and allows the user to choose the equality semantics used by DISTINCT,
1464 * should she be working with a datatype that has more than one equality
1465 * operator.
1467 List *
1468 transformDistinctClause(ParseState *pstate,
1469 List **targetlist, List *sortClause)
1471 List *result = NIL;
1472 ListCell *slitem;
1473 ListCell *tlitem;
1476 * The distinctClause should consist of all ORDER BY items followed
1477 * by all other non-resjunk targetlist items. There must not be any
1478 * resjunk ORDER BY items --- that would imply that we are sorting
1479 * by a value that isn't necessarily unique within a DISTINCT group,
1480 * so the results wouldn't be well-defined. This construction
1481 * ensures we follow the rule that sortClause and distinctClause match;
1482 * in fact the sortClause will always be a prefix of distinctClause.
1484 * Note a corner case: the same TLE could be in the ORDER BY list
1485 * multiple times with different sortops. We have to include it in
1486 * the distinctClause the same way to preserve the prefix property.
1487 * The net effect will be that the TLE value will be made unique
1488 * according to both sortops.
1490 foreach(slitem, sortClause)
1492 SortGroupClause *scl = (SortGroupClause *) lfirst(slitem);
1493 TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist);
1495 if (tle->resjunk)
1496 ereport(ERROR,
1497 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1498 errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list"),
1499 parser_errposition(pstate,
1500 exprLocation((Node *) tle->expr))));
1501 result = lappend(result, copyObject(scl));
1505 * Now add any remaining non-resjunk tlist items, using default
1506 * sort/group semantics for their data types.
1508 foreach(tlitem, *targetlist)
1510 TargetEntry *tle = (TargetEntry *) lfirst(tlitem);
1512 if (tle->resjunk)
1513 continue; /* ignore junk */
1514 result = addTargetToGroupList(pstate, tle,
1515 result, *targetlist,
1516 exprLocation((Node *) tle->expr),
1517 true);
1520 return result;
1524 * transformDistinctOnClause -
1525 * transform a DISTINCT ON clause
1527 * Since we may need to add items to the query's targetlist, that list
1528 * is passed by reference.
1530 * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as
1531 * possible into the distinctClause. This avoids a possible need to re-sort,
1532 * and allows the user to choose the equality semantics used by DISTINCT,
1533 * should she be working with a datatype that has more than one equality
1534 * operator.
1536 List *
1537 transformDistinctOnClause(ParseState *pstate, List *distinctlist,
1538 List **targetlist, List *sortClause)
1540 List *result = NIL;
1541 List *sortgrouprefs = NIL;
1542 bool skipped_sortitem;
1543 ListCell *lc;
1544 ListCell *lc2;
1547 * Add all the DISTINCT ON expressions to the tlist (if not already
1548 * present, they are added as resjunk items). Assign sortgroupref
1549 * numbers to them, and make a list of these numbers. (NB: we rely
1550 * below on the sortgrouprefs list being one-for-one with the original
1551 * distinctlist. Also notice that we could have duplicate DISTINCT ON
1552 * expressions and hence duplicate entries in sortgrouprefs.)
1554 foreach(lc, distinctlist)
1556 Node *dexpr = (Node *) lfirst(lc);
1557 int sortgroupref;
1558 TargetEntry *tle;
1560 tle = findTargetlistEntry(pstate, dexpr,
1561 targetlist, DISTINCT_ON_CLAUSE);
1562 sortgroupref = assignSortGroupRef(tle, *targetlist);
1563 sortgrouprefs = lappend_int(sortgrouprefs, sortgroupref);
1567 * If the user writes both DISTINCT ON and ORDER BY, adopt the
1568 * sorting semantics from ORDER BY items that match DISTINCT ON
1569 * items, and also adopt their column sort order. We insist that
1570 * the distinctClause and sortClause match, so throw error if we
1571 * find the need to add any more distinctClause items after we've
1572 * skipped an ORDER BY item that wasn't in DISTINCT ON.
1574 skipped_sortitem = false;
1575 foreach(lc, sortClause)
1577 SortGroupClause *scl = (SortGroupClause *) lfirst(lc);
1579 if (list_member_int(sortgrouprefs, scl->tleSortGroupRef))
1581 if (skipped_sortitem)
1582 ereport(ERROR,
1583 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1584 errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"),
1585 parser_errposition(pstate,
1586 get_matching_location(scl->tleSortGroupRef,
1587 sortgrouprefs,
1588 distinctlist))));
1589 else
1590 result = lappend(result, copyObject(scl));
1592 else
1593 skipped_sortitem = true;
1597 * Now add any remaining DISTINCT ON items, using default sort/group
1598 * semantics for their data types. (Note: this is pretty questionable;
1599 * if the ORDER BY list doesn't include all the DISTINCT ON items and more
1600 * besides, you certainly aren't using DISTINCT ON in the intended way,
1601 * and you probably aren't going to get consistent results. It might be
1602 * better to throw an error or warning here. But historically we've
1603 * allowed it, so keep doing so.)
1605 forboth(lc, distinctlist, lc2, sortgrouprefs)
1607 Node *dexpr = (Node *) lfirst(lc);
1608 int sortgroupref = lfirst_int(lc2);
1609 TargetEntry *tle = get_sortgroupref_tle(sortgroupref, *targetlist);
1611 if (targetIsInSortList(tle, InvalidOid, result))
1612 continue; /* already in list (with some semantics) */
1613 if (skipped_sortitem)
1614 ereport(ERROR,
1615 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1616 errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"),
1617 parser_errposition(pstate, exprLocation(dexpr))));
1618 result = addTargetToGroupList(pstate, tle,
1619 result, *targetlist,
1620 exprLocation(dexpr),
1621 true);
1624 return result;
1628 * get_matching_location
1629 * Get the exprLocation of the exprs member corresponding to the
1630 * (first) member of sortgrouprefs that equals sortgroupref.
1632 * This is used so that we can point at a troublesome DISTINCT ON entry.
1633 * (Note that we need to use the original untransformed DISTINCT ON list
1634 * item, as whatever TLE it corresponds to will very possibly have a
1635 * parse location pointing to some matching entry in the SELECT list
1636 * or ORDER BY list.)
1638 static int
1639 get_matching_location(int sortgroupref, List *sortgrouprefs, List *exprs)
1641 ListCell *lcs;
1642 ListCell *lce;
1644 forboth(lcs, sortgrouprefs, lce, exprs)
1646 if (lfirst_int(lcs) == sortgroupref)
1647 return exprLocation((Node *) lfirst(lce));
1649 /* if no match, caller blew it */
1650 elog(ERROR, "get_matching_location: no matching sortgroupref");
1651 return -1; /* keep compiler quiet */
1655 * addTargetToSortList
1656 * If the given targetlist entry isn't already in the SortGroupClause
1657 * list, add it to the end of the list, using the given sort ordering
1658 * info.
1660 * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT. If not,
1661 * do nothing (which implies the search for a sort operator will fail).
1662 * pstate should be provided if resolveUnknown is TRUE, but can be NULL
1663 * otherwise.
1665 * Returns the updated SortGroupClause list.
1667 static List *
1668 addTargetToSortList(ParseState *pstate, TargetEntry *tle,
1669 List *sortlist, List *targetlist, SortBy *sortby,
1670 bool resolveUnknown)
1672 Oid restype = exprType((Node *) tle->expr);
1673 Oid sortop;
1674 Oid eqop;
1675 bool reverse;
1676 int location;
1677 ParseCallbackState pcbstate;
1679 /* if tlist item is an UNKNOWN literal, change it to TEXT */
1680 if (restype == UNKNOWNOID && resolveUnknown)
1682 tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1683 restype, TEXTOID, -1,
1684 COERCION_IMPLICIT,
1685 COERCE_IMPLICIT_CAST,
1686 -1);
1687 restype = TEXTOID;
1691 * Rather than clutter the API of get_sort_group_operators and the other
1692 * functions we're about to use, make use of error context callback to
1693 * mark any error reports with a parse position. We point to the operator
1694 * location if present, else to the expression being sorted. (NB: use
1695 * the original untransformed expression here; the TLE entry might well
1696 * point at a duplicate expression in the regular SELECT list.)
1698 location = sortby->location;
1699 if (location < 0)
1700 location = exprLocation(sortby->node);
1701 setup_parser_errposition_callback(&pcbstate, pstate, location);
1703 /* determine the sortop, eqop, and directionality */
1704 switch (sortby->sortby_dir)
1706 case SORTBY_DEFAULT:
1707 case SORTBY_ASC:
1708 get_sort_group_operators(restype,
1709 true, true, false,
1710 &sortop, &eqop, NULL);
1711 reverse = false;
1712 break;
1713 case SORTBY_DESC:
1714 get_sort_group_operators(restype,
1715 false, true, true,
1716 NULL, &eqop, &sortop);
1717 reverse = true;
1718 break;
1719 case SORTBY_USING:
1720 Assert(sortby->useOp != NIL);
1721 sortop = compatible_oper_opid(sortby->useOp,
1722 restype,
1723 restype,
1724 false);
1727 * Verify it's a valid ordering operator, fetch the corresponding
1728 * equality operator, and determine whether to consider it like
1729 * ASC or DESC.
1731 eqop = get_equality_op_for_ordering_op(sortop, &reverse);
1732 if (!OidIsValid(eqop))
1733 ereport(ERROR,
1734 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1735 errmsg("operator %s is not a valid ordering operator",
1736 strVal(llast(sortby->useOp))),
1737 errhint("Ordering operators must be \"<\" or \">\" members of btree operator families.")));
1738 break;
1739 default:
1740 elog(ERROR, "unrecognized sortby_dir: %d", sortby->sortby_dir);
1741 sortop = InvalidOid; /* keep compiler quiet */
1742 eqop = InvalidOid;
1743 reverse = false;
1744 break;
1747 cancel_parser_errposition_callback(&pcbstate);
1749 /* avoid making duplicate sortlist entries */
1750 if (!targetIsInSortList(tle, sortop, sortlist))
1752 SortGroupClause *sortcl = makeNode(SortGroupClause);
1754 sortcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
1756 sortcl->eqop = eqop;
1757 sortcl->sortop = sortop;
1759 switch (sortby->sortby_nulls)
1761 case SORTBY_NULLS_DEFAULT:
1762 /* NULLS FIRST is default for DESC; other way for ASC */
1763 sortcl->nulls_first = reverse;
1764 break;
1765 case SORTBY_NULLS_FIRST:
1766 sortcl->nulls_first = true;
1767 break;
1768 case SORTBY_NULLS_LAST:
1769 sortcl->nulls_first = false;
1770 break;
1771 default:
1772 elog(ERROR, "unrecognized sortby_nulls: %d",
1773 sortby->sortby_nulls);
1774 break;
1777 sortlist = lappend(sortlist, sortcl);
1780 return sortlist;
1784 * addTargetToGroupList
1785 * If the given targetlist entry isn't already in the SortGroupClause
1786 * list, add it to the end of the list, using default sort/group
1787 * semantics.
1789 * This is very similar to addTargetToSortList, except that we allow the
1790 * case where only a grouping (equality) operator can be found, and that
1791 * the TLE is considered "already in the list" if it appears there with any
1792 * sorting semantics.
1794 * location is the parse location to be fingered in event of trouble. Note
1795 * that we can't rely on exprLocation(tle->expr), because that might point
1796 * to a SELECT item that matches the GROUP BY item; it'd be pretty confusing
1797 * to report such a location.
1799 * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT. If not,
1800 * do nothing (which implies the search for an equality operator will fail).
1801 * pstate should be provided if resolveUnknown is TRUE, but can be NULL
1802 * otherwise.
1804 * Returns the updated SortGroupClause list.
1806 static List *
1807 addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
1808 List *grouplist, List *targetlist, int location,
1809 bool resolveUnknown)
1811 Oid restype = exprType((Node *) tle->expr);
1812 Oid sortop;
1813 Oid eqop;
1815 /* if tlist item is an UNKNOWN literal, change it to TEXT */
1816 if (restype == UNKNOWNOID && resolveUnknown)
1818 tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1819 restype, TEXTOID, -1,
1820 COERCION_IMPLICIT,
1821 COERCE_IMPLICIT_CAST,
1822 -1);
1823 restype = TEXTOID;
1826 /* avoid making duplicate grouplist entries */
1827 if (!targetIsInSortList(tle, InvalidOid, grouplist))
1829 SortGroupClause *grpcl = makeNode(SortGroupClause);
1830 ParseCallbackState pcbstate;
1832 setup_parser_errposition_callback(&pcbstate, pstate, location);
1834 /* determine the eqop and optional sortop */
1835 get_sort_group_operators(restype,
1836 false, true, false,
1837 &sortop, &eqop, NULL);
1839 cancel_parser_errposition_callback(&pcbstate);
1841 grpcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
1842 grpcl->eqop = eqop;
1843 grpcl->sortop = sortop;
1844 grpcl->nulls_first = false; /* OK with or without sortop */
1846 grouplist = lappend(grouplist, grpcl);
1849 return grouplist;
1853 * assignSortGroupRef
1854 * Assign the targetentry an unused ressortgroupref, if it doesn't
1855 * already have one. Return the assigned or pre-existing refnumber.
1857 * 'tlist' is the targetlist containing (or to contain) the given targetentry.
1859 Index
1860 assignSortGroupRef(TargetEntry *tle, List *tlist)
1862 Index maxRef;
1863 ListCell *l;
1865 if (tle->ressortgroupref) /* already has one? */
1866 return tle->ressortgroupref;
1868 /* easiest way to pick an unused refnumber: max used + 1 */
1869 maxRef = 0;
1870 foreach(l, tlist)
1872 Index ref = ((TargetEntry *) lfirst(l))->ressortgroupref;
1874 if (ref > maxRef)
1875 maxRef = ref;
1877 tle->ressortgroupref = maxRef + 1;
1878 return tle->ressortgroupref;
1882 * targetIsInSortList
1883 * Is the given target item already in the sortlist?
1884 * If sortop is not InvalidOid, also test for a match to the sortop.
1886 * It is not an oversight that this function ignores the nulls_first flag.
1887 * We check sortop when determining if an ORDER BY item is redundant with
1888 * earlier ORDER BY items, because it's conceivable that "ORDER BY
1889 * foo USING <, foo USING <<<" is not redundant, if <<< distinguishes
1890 * values that < considers equal. We need not check nulls_first
1891 * however, because a lower-order column with the same sortop but
1892 * opposite nulls direction is redundant. Also, we can consider
1893 * ORDER BY foo ASC, foo DESC redundant, so check for a commutator match.
1895 * Works for both ordering and grouping lists (sortop would normally be
1896 * InvalidOid when considering grouping). Note that the main reason we need
1897 * this routine (and not just a quick test for nonzeroness of ressortgroupref)
1898 * is that a TLE might be in only one of the lists.
1900 bool
1901 targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList)
1903 Index ref = tle->ressortgroupref;
1904 ListCell *l;
1906 /* no need to scan list if tle has no marker */
1907 if (ref == 0)
1908 return false;
1910 foreach(l, sortList)
1912 SortGroupClause *scl = (SortGroupClause *) lfirst(l);
1914 if (scl->tleSortGroupRef == ref &&
1915 (sortop == InvalidOid ||
1916 sortop == scl->sortop ||
1917 sortop == get_commutator(scl->sortop)))
1918 return true;
1920 return false;