src/backend/catalog/index.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * index.c
   4  *        code to create and destroy POSTGRES index relations
   5  *
   6  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL$
  12  *
  13  *
  14  * INTERFACE ROUTINES
  15  *              index_create()                  - Create a cataloged index relation
  16  *              index_drop()                    - Removes index relation from catalogs
  17  *              BuildIndexInfo()                - Prepare to insert index tuples
  18  *              FormIndexDatum()                - Construct datum vector for one index tuple
  19  *
  20  *-------------------------------------------------------------------------
  21  */
  22 #include "postgres.h"
  23
  24 #include <unistd.h>
  25
  26 #include "access/genam.h"
  27 #include "access/heapam.h"
  28 #include "access/relscan.h"
  29 #include "access/sysattr.h"
  30 #include "access/transam.h"
  31 #include "access/xact.h"
  32 #include "bootstrap/bootstrap.h"
  33 #include "catalog/catalog.h"
  34 #include "catalog/dependency.h"
  35 #include "catalog/heap.h"
  36 #include "catalog/index.h"
  37 #include "catalog/indexing.h"
  38 #include "catalog/namespace.h"
  39 #include "catalog/pg_constraint.h"
  40 #include "catalog/pg_operator.h"
  41 #include "catalog/pg_opclass.h"
  42 #include "catalog/pg_tablespace.h"
  43 #include "catalog/pg_type.h"
  44 #include "catalog/storage.h"
  45 #include "commands/tablecmds.h"
  46 #include "executor/executor.h"
  47 #include "miscadmin.h"
  48 #include "nodes/nodeFuncs.h"
  49 #include "optimizer/clauses.h"
  50 #include "optimizer/var.h"
  51 #include "storage/bufmgr.h"
  52 #include "storage/lmgr.h"
  53 #include "storage/procarray.h"
  54 #include "storage/smgr.h"
  55 #include "utils/builtins.h"
  56 #include "utils/fmgroids.h"
  57 #include "utils/inval.h"
  58 #include "utils/lsyscache.h"
  59 #include "utils/memutils.h"
  60 #include "utils/relcache.h"
  61 #include "utils/syscache.h"
  62 #include "utils/tuplesort.h"
  63 #include "utils/snapmgr.h"
  64 #include "utils/tqual.h"
  65
  66
  67 /* state info for validate_index bulkdelete callback */
  68 typedef struct
  69 {
  70         Tuplesortstate *tuplesort;      /* for sorting the index TIDs */
  71         /* statistics (for debug purposes only): */
  72         double          htups,
  73                                 itups,
  74                                 tups_inserted;
  75 } v_i_state;
  76
  77 /* non-export function prototypes */
  78 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
  79                                                  IndexInfo *indexInfo,
  80                                                  Oid accessMethodObjectId,
  81                                                  Oid *classObjectId);
  82 static void InitializeAttributeOids(Relation indexRelation,
  83                                                 int numatts, Oid indexoid);
  84 static void AppendAttributeTuples(Relation indexRelation, int numatts);
  85 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
  86                                         IndexInfo *indexInfo,
  87                                         Oid *classOids,
  88                                         int16 *coloptions,
  89                                         bool primary,
  90                                         bool isvalid);
  91 static void index_update_stats(Relation rel, bool hasindex, bool isprimary,
  92                                    Oid reltoastidxid, double reltuples);
  93 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
  94 static void validate_index_heapscan(Relation heapRelation,
  95                                                 Relation indexRelation,
  96                                                 IndexInfo *indexInfo,
  97                                                 Snapshot snapshot,
  98                                                 v_i_state *state);
  99 static Oid      IndexGetRelation(Oid indexId);
 100
 101
 102 /*
 103  *              ConstructTupleDescriptor
 104  *
 105  * Build an index tuple descriptor for a new index
 106  */
 107 static TupleDesc
 108 ConstructTupleDescriptor(Relation heapRelation,
 109                                                  IndexInfo *indexInfo,
 110                                                  Oid accessMethodObjectId,
 111                                                  Oid *classObjectId)
 112 {
 113         int                     numatts = indexInfo->ii_NumIndexAttrs;
 114         ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
 115         HeapTuple       amtuple;
 116         Form_pg_am      amform;
 117         TupleDesc       heapTupDesc;
 118         TupleDesc       indexTupDesc;
 119         int                     natts;                  /* #atts in heap rel --- for error checks */
 120         int                     i;
 121
 122         /* We need access to the index AM's pg_am tuple */
 123         amtuple = SearchSysCache(AMOID,
 124                                                          ObjectIdGetDatum(accessMethodObjectId),
 125                                                          0, 0, 0);
 126         if (!HeapTupleIsValid(amtuple))
 127                 elog(ERROR, "cache lookup failed for access method %u",
 128                          accessMethodObjectId);
 129         amform = (Form_pg_am) GETSTRUCT(amtuple);
 130
 131         /* ... and to the table's tuple descriptor */
 132         heapTupDesc = RelationGetDescr(heapRelation);
 133         natts = RelationGetForm(heapRelation)->relnatts;
 134
 135         /*
 136          * allocate the new tuple descriptor
 137          */
 138         indexTupDesc = CreateTemplateTupleDesc(numatts, false);
 139
 140         /*
 141          * For simple index columns, we copy the pg_attribute row from the parent
 142          * relation and modify it as necessary.  For expressions we have to cons
 143          * up a pg_attribute row the hard way.
 144          */
 145         for (i = 0; i < numatts; i++)
 146         {
 147                 AttrNumber      atnum = indexInfo->ii_KeyAttrNumbers[i];
 148                 Form_pg_attribute to = indexTupDesc->attrs[i];
 149                 HeapTuple       tuple;
 150                 Form_pg_type typeTup;
 151                 Form_pg_opclass opclassTup;
 152                 Oid                     keyType;
 153
 154                 if (atnum != 0)
 155                 {
 156                         /* Simple index column */
 157                         Form_pg_attribute from;
 158
 159                         if (atnum < 0)
 160                         {
 161                                 /*
 162                                  * here we are indexing on a system attribute (-1...-n)
 163                                  */
 164                                 from = SystemAttributeDefinition(atnum,
 165                                                                                    heapRelation->rd_rel->relhasoids);
 166                         }
 167                         else
 168                         {
 169                                 /*
 170                                  * here we are indexing on a normal attribute (1...n)
 171                                  */
 172                                 if (atnum > natts)              /* safety check */
 173                                         elog(ERROR, "invalid column number %d", atnum);
 174                                 from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
 175                         }
 176
 177                         /*
 178                          * now that we've determined the "from", let's copy the tuple desc
 179                          * data...
 180                          */
 181                         memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
 182
 183                         /*
 184                          * Fix the stuff that should not be the same as the underlying
 185                          * attr
 186                          */
 187                         to->attnum = i + 1;
 188
 189                         to->attstattarget = -1;
 190                         to->attcacheoff = -1;
 191                         to->attnotnull = false;
 192                         to->atthasdef = false;
 193                         to->attislocal = true;
 194                         to->attinhcount = 0;
 195                 }
 196                 else
 197                 {
 198                         /* Expressional index */
 199                         Node       *indexkey;
 200
 201                         MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
 202
 203                         if (indexpr_item == NULL)       /* shouldn't happen */
 204                                 elog(ERROR, "too few entries in indexprs list");
 205                         indexkey = (Node *) lfirst(indexpr_item);
 206                         indexpr_item = lnext(indexpr_item);
 207
 208                         /*
 209                          * Make the attribute's name "pg_expresssion_nnn" (maybe think of
 210                          * something better later)
 211                          */
 212                         sprintf(NameStr(to->attname), "pg_expression_%d", i + 1);
 213
 214                         /*
 215                          * Lookup the expression type in pg_type for the type length etc.
 216                          */
 217                         keyType = exprType(indexkey);
 218                         tuple = SearchSysCache(TYPEOID,
 219                                                                    ObjectIdGetDatum(keyType),
 220                                                                    0, 0, 0);
 221                         if (!HeapTupleIsValid(tuple))
 222                                 elog(ERROR, "cache lookup failed for type %u", keyType);
 223                         typeTup = (Form_pg_type) GETSTRUCT(tuple);
 224
 225                         /*
 226                          * Assign some of the attributes values. Leave the rest as 0.
 227                          */
 228                         to->attnum = i + 1;
 229                         to->atttypid = keyType;
 230                         to->attlen = typeTup->typlen;
 231                         to->attbyval = typeTup->typbyval;
 232                         to->attstorage = typeTup->typstorage;
 233                         to->attalign = typeTup->typalign;
 234                         to->attstattarget = -1;
 235                         to->attcacheoff = -1;
 236                         to->atttypmod = -1;
 237                         to->attislocal = true;
 238
 239                         ReleaseSysCache(tuple);
 240
 241                         /*
 242                          * Make sure the expression yields a type that's safe to store in
 243                          * an index.  We need this defense because we have index opclasses
 244                          * for pseudo-types such as "record", and the actually stored type
 245                          * had better be safe; eg, a named composite type is okay, an
 246                          * anonymous record type is not.  The test is the same as for
 247                          * whether a table column is of a safe type (which is why we
 248                          * needn't check for the non-expression case).
 249                          */
 250                         CheckAttributeType(NameStr(to->attname), to->atttypid);
 251                 }
 252
 253                 /*
 254                  * We do not yet have the correct relation OID for the index, so just
 255                  * set it invalid for now.      InitializeAttributeOids() will fix it
 256                  * later.
 257                  */
 258                 to->attrelid = InvalidOid;
 259
 260                 /*
 261                  * Check the opclass and index AM to see if either provides a keytype
 262                  * (overriding the attribute type).  Opclass takes precedence.
 263                  */
 264                 tuple = SearchSysCache(CLAOID,
 265                                                            ObjectIdGetDatum(classObjectId[i]),
 266                                                            0, 0, 0);
 267                 if (!HeapTupleIsValid(tuple))
 268                         elog(ERROR, "cache lookup failed for opclass %u",
 269                                  classObjectId[i]);
 270                 opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
 271                 if (OidIsValid(opclassTup->opckeytype))
 272                         keyType = opclassTup->opckeytype;
 273                 else
 274                         keyType = amform->amkeytype;
 275                 ReleaseSysCache(tuple);
 276
 277                 if (OidIsValid(keyType) && keyType != to->atttypid)
 278                 {
 279                         /* index value and heap value have different types */
 280                         tuple = SearchSysCache(TYPEOID,
 281                                                                    ObjectIdGetDatum(keyType),
 282                                                                    0, 0, 0);
 283                         if (!HeapTupleIsValid(tuple))
 284                                 elog(ERROR, "cache lookup failed for type %u", keyType);
 285                         typeTup = (Form_pg_type) GETSTRUCT(tuple);
 286
 287                         to->atttypid = keyType;
 288                         to->atttypmod = -1;
 289                         to->attlen = typeTup->typlen;
 290                         to->attbyval = typeTup->typbyval;
 291                         to->attalign = typeTup->typalign;
 292                         to->attstorage = typeTup->typstorage;
 293
 294                         ReleaseSysCache(tuple);
 295                 }
 296         }
 297
 298         ReleaseSysCache(amtuple);
 299
 300         return indexTupDesc;
 301 }
 302
 303 /* ----------------------------------------------------------------
 304  *              InitializeAttributeOids
 305  * ----------------------------------------------------------------
 306  */
 307 static void
 308 InitializeAttributeOids(Relation indexRelation,
 309                                                 int numatts,
 310                                                 Oid indexoid)
 311 {
 312         TupleDesc       tupleDescriptor;
 313         int                     i;
 314
 315         tupleDescriptor = RelationGetDescr(indexRelation);
 316
 317         for (i = 0; i < numatts; i += 1)
 318                 tupleDescriptor->attrs[i]->attrelid = indexoid;
 319 }
 320
 321 /* ----------------------------------------------------------------
 322  *              AppendAttributeTuples
 323  * ----------------------------------------------------------------
 324  */
 325 static void
 326 AppendAttributeTuples(Relation indexRelation, int numatts)
 327 {
 328         Relation        pg_attribute;
 329         CatalogIndexState indstate;
 330         TupleDesc       indexTupDesc;
 331         int                     i;
 332
 333         /*
 334          * open the attribute relation and its indexes
 335          */
 336         pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
 337
 338         indstate = CatalogOpenIndexes(pg_attribute);
 339
 340         /*
 341          * insert data from new index's tupdesc into pg_attribute
 342          */
 343         indexTupDesc = RelationGetDescr(indexRelation);
 344
 345         for (i = 0; i < numatts; i++)
 346         {
 347                 /*
 348                  * There used to be very grotty code here to set these fields, but I
 349                  * think it's unnecessary.  They should be set already.
 350                  */
 351                 Assert(indexTupDesc->attrs[i]->attnum == i + 1);
 352                 Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
 353
 354                 InsertPgAttributeTuple(pg_attribute, indexTupDesc->attrs[i], indstate);
 355         }
 356
 357         CatalogCloseIndexes(indstate);
 358
 359         heap_close(pg_attribute, RowExclusiveLock);
 360 }
 361
 362 /* ----------------------------------------------------------------
 363  *              UpdateIndexRelation
 364  *
 365  * Construct and insert a new entry in the pg_index catalog
 366  * ----------------------------------------------------------------
 367  */
 368 static void
 369 UpdateIndexRelation(Oid indexoid,
 370                                         Oid heapoid,
 371                                         IndexInfo *indexInfo,
 372                                         Oid *classOids,
 373                                         int16 *coloptions,
 374                                         bool primary,
 375                                         bool isvalid)
 376 {
 377         int2vector *indkey;
 378         oidvector  *indclass;
 379         int2vector *indoption;
 380         Datum           exprsDatum;
 381         Datum           predDatum;
 382         Datum           values[Natts_pg_index];
 383         bool            nulls[Natts_pg_index];
 384         Relation        pg_index;
 385         HeapTuple       tuple;
 386         int                     i;
 387
 388         /*
 389          * Copy the index key, opclass, and indoption info into arrays (should we
 390          * make the caller pass them like this to start with?)
 391          */
 392         indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
 393         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
 394                 indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
 395         indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
 396         indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs);
 397
 398         /*
 399          * Convert the index expressions (if any) to a text datum
 400          */
 401         if (indexInfo->ii_Expressions != NIL)
 402         {
 403                 char       *exprsString;
 404
 405                 exprsString = nodeToString(indexInfo->ii_Expressions);
 406                 exprsDatum = CStringGetTextDatum(exprsString);
 407                 pfree(exprsString);
 408         }
 409         else
 410                 exprsDatum = (Datum) 0;
 411
 412         /*
 413          * Convert the index predicate (if any) to a text datum.  Note we convert
 414          * implicit-AND format to normal explicit-AND for storage.
 415          */
 416         if (indexInfo->ii_Predicate != NIL)
 417         {
 418                 char       *predString;
 419
 420                 predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
 421                 predDatum = CStringGetTextDatum(predString);
 422                 pfree(predString);
 423         }
 424         else
 425                 predDatum = (Datum) 0;
 426
 427         /*
 428          * open the system catalog index relation
 429          */
 430         pg_index = heap_open(IndexRelationId, RowExclusiveLock);
 431
 432         /*
 433          * Build a pg_index tuple
 434          */
 435         MemSet(nulls, false, sizeof(nulls));
 436
 437         values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
 438         values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
 439         values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
 440         values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
 441         values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
 442         values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
 443         values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
 444         values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
 445         /* we set isvalid and isready the same way */
 446         values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid);
 447         values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
 448         values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
 449         values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
 450         values[Anum_pg_index_indexprs - 1] = exprsDatum;
 451         if (exprsDatum == (Datum) 0)
 452                 nulls[Anum_pg_index_indexprs - 1] = true;
 453         values[Anum_pg_index_indpred - 1] = predDatum;
 454         if (predDatum == (Datum) 0)
 455                 nulls[Anum_pg_index_indpred - 1] = true;
 456
 457         tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
 458
 459         /*
 460          * insert the tuple into the pg_index catalog
 461          */
 462         simple_heap_insert(pg_index, tuple);
 463
 464         /* update the indexes on pg_index */
 465         CatalogUpdateIndexes(pg_index, tuple);
 466
 467         /*
 468          * close the relation and free the tuple
 469          */
 470         heap_close(pg_index, RowExclusiveLock);
 471         heap_freetuple(tuple);
 472 }
 473
 474
 475 /*
 476  * index_create
 477  *
 478  * heapRelationId: OID of table to build index on
 479  * indexRelationName: what it say
 480  * indexRelationId: normally, pass InvalidOid to let this routine
 481  *              generate an OID for the index.  During bootstrap this may be
 482  *              nonzero to specify a preselected OID.
 483  * indexInfo: same info executor uses to insert into the index
 484  * accessMethodObjectId: OID of index AM to use
 485  * tableSpaceId: OID of tablespace to use
 486  * classObjectId: array of index opclass OIDs, one per index column
 487  * coloptions: array of per-index-column indoption settings
 488  * reloptions: AM-specific options
 489  * isprimary: index is a PRIMARY KEY
 490  * isconstraint: index is owned by a PRIMARY KEY or UNIQUE constraint
 491  * allow_system_table_mods: allow table to be a system catalog
 492  * skip_build: true to skip the index_build() step for the moment; caller
 493  *              must do it later (typically via reindex_index())
 494  * concurrent: if true, do not lock the table against writers.  The index
 495  *              will be marked "invalid" and the caller must take additional steps
 496  *              to fix it up.
 497  *
 498  * Returns OID of the created index.
 499  */
 500 Oid
 501 index_create(Oid heapRelationId,
 502                          const char *indexRelationName,
 503                          Oid indexRelationId,
 504                          IndexInfo *indexInfo,
 505                          Oid accessMethodObjectId,
 506                          Oid tableSpaceId,
 507                          Oid *classObjectId,
 508                          int16 *coloptions,
 509                          Datum reloptions,
 510                          bool isprimary,
 511                          bool isconstraint,
 512                          bool allow_system_table_mods,
 513                          bool skip_build,
 514                          bool concurrent)
 515 {
 516         Relation        pg_class;
 517         Relation        heapRelation;
 518         Relation        indexRelation;
 519         TupleDesc       indexTupDesc;
 520         bool            shared_relation;
 521         Oid                     namespaceId;
 522         int                     i;
 523
 524         pg_class = heap_open(RelationRelationId, RowExclusiveLock);
 525
 526         /*
 527          * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
 528          * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
 529          * (but not VACUUM).
 530          */
 531         heapRelation = heap_open(heapRelationId,
 532                                                 (concurrent ? ShareUpdateExclusiveLock : ShareLock));
 533
 534         /*
 535          * The index will be in the same namespace as its parent table, and is
 536          * shared across databases if and only if the parent is.
 537          */
 538         namespaceId = RelationGetNamespace(heapRelation);
 539         shared_relation = heapRelation->rd_rel->relisshared;
 540
 541         /*
 542          * check parameters
 543          */
 544         if (indexInfo->ii_NumIndexAttrs < 1)
 545                 elog(ERROR, "must index at least one column");
 546
 547         if (!allow_system_table_mods &&
 548                 IsSystemRelation(heapRelation) &&
 549                 IsNormalProcessingMode())
 550                 ereport(ERROR,
 551                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 552                                  errmsg("user-defined indexes on system catalog tables are not supported")));
 553
 554         /*
 555          * concurrent index build on a system catalog is unsafe because we tend to
 556          * release locks before committing in catalogs
 557          */
 558         if (concurrent &&
 559                 IsSystemRelation(heapRelation))
 560                 ereport(ERROR,
 561                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 562                                  errmsg("concurrent index creation on system catalog tables is not supported")));
 563
 564         /*
 565          * We cannot allow indexing a shared relation after initdb (because
 566          * there's no way to make the entry in other databases' pg_class).
 567          */
 568         if (shared_relation && !IsBootstrapProcessingMode())
 569                 ereport(ERROR,
 570                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 571                                  errmsg("shared indexes cannot be created after initdb")));
 572
 573         /*
 574          * Validate shared/non-shared tablespace (must check this before doing
 575          * GetNewRelFileNode, to prevent Assert therein)
 576          */
 577         if (shared_relation)
 578         {
 579                 if (tableSpaceId != GLOBALTABLESPACE_OID)
 580                         /* elog since this is not a user-facing error */
 581                         elog(ERROR,
 582                                  "shared relations must be placed in pg_global tablespace");
 583         }
 584         else
 585         {
 586                 if (tableSpaceId == GLOBALTABLESPACE_OID)
 587                         ereport(ERROR,
 588                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 589                                          errmsg("only shared relations can be placed in pg_global tablespace")));
 590         }
 591
 592         if (get_relname_relid(indexRelationName, namespaceId))
 593                 ereport(ERROR,
 594                                 (errcode(ERRCODE_DUPLICATE_TABLE),
 595                                  errmsg("relation \"%s\" already exists",
 596                                                 indexRelationName)));
 597
 598         /*
 599          * construct tuple descriptor for index tuples
 600          */
 601         indexTupDesc = ConstructTupleDescriptor(heapRelation,
 602                                                                                         indexInfo,
 603                                                                                         accessMethodObjectId,
 604                                                                                         classObjectId);
 605
 606         /*
 607          * Allocate an OID for the index, unless we were told what to use.
 608          *
 609          * The OID will be the relfilenode as well, so make sure it doesn't
 610          * collide with either pg_class OIDs or existing physical files.
 611          */
 612         if (!OidIsValid(indexRelationId))
 613                 indexRelationId = GetNewRelFileNode(tableSpaceId, shared_relation,
 614                                                                                         pg_class);
 615
 616         /*
 617          * create the index relation's relcache entry and physical disk file. (If
 618          * we fail further down, it's the smgr's responsibility to remove the disk
 619          * file again.)
 620          */
 621         indexRelation = heap_create(indexRelationName,
 622                                                                 namespaceId,
 623                                                                 tableSpaceId,
 624                                                                 indexRelationId,
 625                                                                 indexTupDesc,
 626                                                                 RELKIND_INDEX,
 627                                                                 shared_relation,
 628                                                                 allow_system_table_mods);
 629
 630         Assert(indexRelationId == RelationGetRelid(indexRelation));
 631
 632         /*
 633          * Obtain exclusive lock on it.  Although no other backends can see it
 634          * until we commit, this prevents deadlock-risk complaints from lock
 635          * manager in cases such as CLUSTER.
 636          */
 637         LockRelation(indexRelation, AccessExclusiveLock);
 638
 639         /*
 640          * Fill in fields of the index's pg_class entry that are not set correctly
 641          * by heap_create.
 642          *
 643          * XXX should have a cleaner way to create cataloged indexes
 644          */
 645         indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
 646         indexRelation->rd_rel->relam = accessMethodObjectId;
 647         indexRelation->rd_rel->relkind = RELKIND_INDEX;
 648         indexRelation->rd_rel->relhasoids = false;
 649
 650         /*
 651          * store index's pg_class entry
 652          */
 653         InsertPgClassTuple(pg_class, indexRelation,
 654                                            RelationGetRelid(indexRelation),
 655                                            reloptions);
 656
 657         /* done with pg_class */
 658         heap_close(pg_class, RowExclusiveLock);
 659
 660         /*
 661          * now update the object id's of all the attribute tuple forms in the
 662          * index relation's tuple descriptor
 663          */
 664         InitializeAttributeOids(indexRelation,
 665                                                         indexInfo->ii_NumIndexAttrs,
 666                                                         indexRelationId);
 667
 668         /*
 669          * append ATTRIBUTE tuples for the index
 670          */
 671         AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
 672
 673         /* ----------------
 674          *        update pg_index
 675          *        (append INDEX tuple)
 676          *
 677          *        Note that this stows away a representation of "predicate".
 678          *        (Or, could define a rule to maintain the predicate) --Nels, Feb '92
 679          * ----------------
 680          */
 681         UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
 682                                                 classObjectId, coloptions, isprimary, !concurrent);
 683
 684         /*
 685          * Register constraint and dependencies for the index.
 686          *
 687          * If the index is from a CONSTRAINT clause, construct a pg_constraint
 688          * entry. The index is then linked to the constraint, which in turn is
 689          * linked to the table.  If it's not a CONSTRAINT, make the dependency
 690          * directly on the table.
 691          *
 692          * We don't need a dependency on the namespace, because there'll be an
 693          * indirect dependency via our parent table.
 694          *
 695          * During bootstrap we can't register any dependencies, and we don't try
 696          * to make a constraint either.
 697          */
 698         if (!IsBootstrapProcessingMode())
 699         {
 700                 ObjectAddress myself,
 701                                         referenced;
 702
 703                 myself.classId = RelationRelationId;
 704                 myself.objectId = indexRelationId;
 705                 myself.objectSubId = 0;
 706
 707                 if (isconstraint)
 708                 {
 709                         char            constraintType;
 710                         Oid                     conOid;
 711
 712                         if (isprimary)
 713                                 constraintType = CONSTRAINT_PRIMARY;
 714                         else if (indexInfo->ii_Unique)
 715                                 constraintType = CONSTRAINT_UNIQUE;
 716                         else
 717                         {
 718                                 elog(ERROR, "constraint must be PRIMARY or UNIQUE");
 719                                 constraintType = 0;             /* keep compiler quiet */
 720                         }
 721
 722                         /* Shouldn't have any expressions */
 723                         if (indexInfo->ii_Expressions)
 724                                 elog(ERROR, "constraints cannot have index expressions");
 725
 726                         conOid = CreateConstraintEntry(indexRelationName,
 727                                                                                    namespaceId,
 728                                                                                    constraintType,
 729                                                                                    false,               /* isDeferrable */
 730                                                                                    false,               /* isDeferred */
 731                                                                                    heapRelationId,
 732                                                                                    indexInfo->ii_KeyAttrNumbers,
 733                                                                                    indexInfo->ii_NumIndexAttrs,
 734                                                                                    InvalidOid,  /* no domain */
 735                                                                                    InvalidOid,  /* no foreign key */
 736                                                                                    NULL,
 737                                                                                    NULL,
 738                                                                                    NULL,
 739                                                                                    NULL,
 740                                                                                    0,
 741                                                                                    ' ',
 742                                                                                    ' ',
 743                                                                                    ' ',
 744                                                                                    InvalidOid,  /* no associated index */
 745                                                                                    NULL,                /* no check constraint */
 746                                                                                    NULL,
 747                                                                                    NULL,
 748                                                                                    true, /* islocal */
 749                                                                                    0); /* inhcount */
 750
 751                         referenced.classId = ConstraintRelationId;
 752                         referenced.objectId = conOid;
 753                         referenced.objectSubId = 0;
 754
 755                         recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
 756                 }
 757                 else
 758                 {
 759                         bool            have_simple_col = false;
 760
 761                         /* Create auto dependencies on simply-referenced columns */
 762                         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
 763                         {
 764                                 if (indexInfo->ii_KeyAttrNumbers[i] != 0)
 765                                 {
 766                                         referenced.classId = RelationRelationId;
 767                                         referenced.objectId = heapRelationId;
 768                                         referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
 769
 770                                         recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
 771
 772                                         have_simple_col = true;
 773                                 }
 774                         }
 775
 776                         /*
 777                          * It's possible for an index to not depend on any columns of the
 778                          * table at all, in which case we need to give it a dependency on
 779                          * the table as a whole; else it won't get dropped when the table
 780                          * is dropped.  This edge case is not totally useless; for
 781                          * example, a unique index on a constant expression can serve to
 782                          * prevent a table from containing more than one row.
 783                          */
 784                         if (!have_simple_col &&
 785                          !contain_vars_of_level((Node *) indexInfo->ii_Expressions, 0) &&
 786                                 !contain_vars_of_level((Node *) indexInfo->ii_Predicate, 0))
 787                         {
 788                                 referenced.classId = RelationRelationId;
 789                                 referenced.objectId = heapRelationId;
 790                                 referenced.objectSubId = 0;
 791
 792                                 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
 793                         }
 794                 }
 795
 796                 /* Store dependency on operator classes */
 797                 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
 798                 {
 799                         referenced.classId = OperatorClassRelationId;
 800                         referenced.objectId = classObjectId[i];
 801                         referenced.objectSubId = 0;
 802
 803                         recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
 804                 }
 805
 806                 /* Store dependencies on anything mentioned in index expressions */
 807                 if (indexInfo->ii_Expressions)
 808                 {
 809                         recordDependencyOnSingleRelExpr(&myself,
 810                                                                                   (Node *) indexInfo->ii_Expressions,
 811                                                                                         heapRelationId,
 812                                                                                         DEPENDENCY_NORMAL,
 813                                                                                         DEPENDENCY_AUTO);
 814                 }
 815
 816                 /* Store dependencies on anything mentioned in predicate */
 817                 if (indexInfo->ii_Predicate)
 818                 {
 819                         recordDependencyOnSingleRelExpr(&myself,
 820                                                                                         (Node *) indexInfo->ii_Predicate,
 821                                                                                         heapRelationId,
 822                                                                                         DEPENDENCY_NORMAL,
 823                                                                                         DEPENDENCY_AUTO);
 824                 }
 825         }
 826
 827         /*
 828          * Advance the command counter so that we can see the newly-entered
 829          * catalog tuples for the index.
 830          */
 831         CommandCounterIncrement();
 832
 833         /*
 834          * In bootstrap mode, we have to fill in the index strategy structure with
 835          * information from the catalogs.  If we aren't bootstrapping, then the
 836          * relcache entry has already been rebuilt thanks to sinval update during
 837          * CommandCounterIncrement.
 838          */
 839         if (IsBootstrapProcessingMode())
 840                 RelationInitIndexAccessInfo(indexRelation);
 841         else
 842                 Assert(indexRelation->rd_indexcxt != NULL);
 843
 844         /*
 845          * If this is bootstrap (initdb) time, then we don't actually fill in the
 846          * index yet.  We'll be creating more indexes and classes later, so we
 847          * delay filling them in until just before we're done with bootstrapping.
 848          * Similarly, if the caller specified skip_build then filling the index is
 849          * delayed till later (ALTER TABLE can save work in some cases with this).
 850          * Otherwise, we call the AM routine that constructs the index.
 851          */
 852         if (IsBootstrapProcessingMode())
 853         {
 854                 index_register(heapRelationId, indexRelationId, indexInfo);
 855         }
 856         else if (skip_build)
 857         {
 858                 /*
 859                  * Caller is responsible for filling the index later on.  However,
 860                  * we'd better make sure that the heap relation is correctly marked as
 861                  * having an index.
 862                  */
 863                 index_update_stats(heapRelation,
 864                                                    true,
 865                                                    isprimary,
 866                                                    InvalidOid,
 867                                                    heapRelation->rd_rel->reltuples);
 868                 /* Make the above update visible */
 869                 CommandCounterIncrement();
 870         }
 871         else
 872         {
 873                 index_build(heapRelation, indexRelation, indexInfo, isprimary);
 874         }
 875
 876         /*
 877          * Close the heap and index; but we keep the locks that we acquired above
 878          * until end of transaction.
 879          */
 880         index_close(indexRelation, NoLock);
 881         heap_close(heapRelation, NoLock);
 882
 883         return indexRelationId;
 884 }
 885
 886 /*
 887  *              index_drop
 888  *
 889  * NOTE: this routine should now only be called through performDeletion(),
 890  * else associated dependencies won't be cleaned up.
 891  */
 892 void
 893 index_drop(Oid indexId)
 894 {
 895         Oid                     heapId;
 896         Relation        userHeapRelation;
 897         Relation        userIndexRelation;
 898         Relation        indexRelation;
 899         HeapTuple       tuple;
 900         bool            hasexprs;
 901
 902         /*
 903          * To drop an index safely, we must grab exclusive lock on its parent
 904          * table.  Exclusive lock on the index alone is insufficient because
 905          * another backend might be about to execute a query on the parent table.
 906          * If it relies on a previously cached list of index OIDs, then it could
 907          * attempt to access the just-dropped index.  We must therefore take a
 908          * table lock strong enough to prevent all queries on the table from
 909          * proceeding until we commit and send out a shared-cache-inval notice
 910          * that will make them update their index lists.
 911          */
 912         heapId = IndexGetRelation(indexId);
 913         userHeapRelation = heap_open(heapId, AccessExclusiveLock);
 914
 915         userIndexRelation = index_open(indexId, AccessExclusiveLock);
 916
 917         /*
 918          * Schedule physical removal of the files
 919          */
 920         RelationDropStorage(userIndexRelation);
 921
 922         /*
 923          * Close and flush the index's relcache entry, to ensure relcache doesn't
 924          * try to rebuild it while we're deleting catalog entries. We keep the
 925          * lock though.
 926          */
 927         index_close(userIndexRelation, NoLock);
 928
 929         RelationForgetRelation(indexId);
 930
 931         /*
 932          * fix INDEX relation, and check for expressional index
 933          */
 934         indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
 935
 936         tuple = SearchSysCache(INDEXRELID,
 937                                                    ObjectIdGetDatum(indexId),
 938                                                    0, 0, 0);
 939         if (!HeapTupleIsValid(tuple))
 940                 elog(ERROR, "cache lookup failed for index %u", indexId);
 941
 942         hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
 943
 944         simple_heap_delete(indexRelation, &tuple->t_self);
 945
 946         ReleaseSysCache(tuple);
 947         heap_close(indexRelation, RowExclusiveLock);
 948
 949         /*
 950          * if it has any expression columns, we might have stored statistics about
 951          * them.
 952          */
 953         if (hasexprs)
 954                 RemoveStatistics(indexId, 0);
 955
 956         /*
 957          * fix ATTRIBUTE relation
 958          */
 959         DeleteAttributeTuples(indexId);
 960
 961         /*
 962          * fix RELATION relation
 963          */
 964         DeleteRelationTuple(indexId);
 965
 966         /*
 967          * We are presently too lazy to attempt to compute the new correct value
 968          * of relhasindex (the next VACUUM will fix it if necessary). So there is
 969          * no need to update the pg_class tuple for the owning relation. But we
 970          * must send out a shared-cache-inval notice on the owning relation to
 971          * ensure other backends update their relcache lists of indexes.
 972          */
 973         CacheInvalidateRelcache(userHeapRelation);
 974
 975         /*
 976          * Close owning rel, but keep lock
 977          */
 978         heap_close(userHeapRelation, NoLock);
 979 }
 980
 981 /* ----------------------------------------------------------------
 982  *                                              index_build support
 983  * ----------------------------------------------------------------
 984  */
 985
 986 /* ----------------
 987  *              BuildIndexInfo
 988  *                      Construct an IndexInfo record for an open index
 989  *
 990  * IndexInfo stores the information about the index that's needed by
 991  * FormIndexDatum, which is used for both index_build() and later insertion
 992  * of individual index tuples.  Normally we build an IndexInfo for an index
 993  * just once per command, and then use it for (potentially) many tuples.
 994  * ----------------
 995  */
 996 IndexInfo *
 997 BuildIndexInfo(Relation index)
 998 {
 999         IndexInfo  *ii = makeNode(IndexInfo);
1000         Form_pg_index indexStruct = index->rd_index;
1001         int                     i;
1002         int                     numKeys;
1003
1004         /* check the number of keys, and copy attr numbers into the IndexInfo */
1005         numKeys = indexStruct->indnatts;
1006         if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1007                 elog(ERROR, "invalid indnatts %d for index %u",
1008                          numKeys, RelationGetRelid(index));
1009         ii->ii_NumIndexAttrs = numKeys;
1010         for (i = 0; i < numKeys; i++)
1011                 ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1012
1013         /* fetch any expressions needed for expressional indexes */
1014         ii->ii_Expressions = RelationGetIndexExpressions(index);
1015         ii->ii_ExpressionsState = NIL;
1016
1017         /* fetch index predicate if any */
1018         ii->ii_Predicate = RelationGetIndexPredicate(index);
1019         ii->ii_PredicateState = NIL;
1020
1021         /* other info */
1022         ii->ii_Unique = indexStruct->indisunique;
1023         ii->ii_ReadyForInserts = indexStruct->indisready;
1024
1025         /* initialize index-build state to default */
1026         ii->ii_Concurrent = false;
1027         ii->ii_BrokenHotChain = false;
1028
1029         return ii;
1030 }
1031
1032 /* ----------------
1033  *              FormIndexDatum
1034  *                      Construct values[] and isnull[] arrays for a new index tuple.
1035  *
1036  *      indexInfo               Info about the index
1037  *      slot                    Heap tuple for which we must prepare an index entry
1038  *      estate                  executor state for evaluating any index expressions
1039  *      values                  Array of index Datums (output area)
1040  *      isnull                  Array of is-null indicators (output area)
1041  *
1042  * When there are no index expressions, estate may be NULL.  Otherwise it
1043  * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
1044  * context must point to the heap tuple passed in.
1045  *
1046  * Notice we don't actually call index_form_tuple() here; we just prepare
1047  * its input arrays values[] and isnull[].      This is because the index AM
1048  * may wish to alter the data before storage.
1049  * ----------------
1050  */
1051 void
1052 FormIndexDatum(IndexInfo *indexInfo,
1053                            TupleTableSlot *slot,
1054                            EState *estate,
1055                            Datum *values,
1056                            bool *isnull)
1057 {
1058         ListCell   *indexpr_item;
1059         int                     i;
1060
1061         if (indexInfo->ii_Expressions != NIL &&
1062                 indexInfo->ii_ExpressionsState == NIL)
1063         {
1064                 /* First time through, set up expression evaluation state */
1065                 indexInfo->ii_ExpressionsState = (List *)
1066                         ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
1067                                                         estate);
1068                 /* Check caller has set up context correctly */
1069                 Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1070         }
1071         indexpr_item = list_head(indexInfo->ii_ExpressionsState);
1072
1073         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1074         {
1075                 int                     keycol = indexInfo->ii_KeyAttrNumbers[i];
1076                 Datum           iDatum;
1077                 bool            isNull;
1078
1079                 if (keycol != 0)
1080                 {
1081                         /*
1082                          * Plain index column; get the value we need directly from the
1083                          * heap tuple.
1084                          */
1085                         iDatum = slot_getattr(slot, keycol, &isNull);
1086                 }
1087                 else
1088                 {
1089                         /*
1090                          * Index expression --- need to evaluate it.
1091                          */
1092                         if (indexpr_item == NULL)
1093                                 elog(ERROR, "wrong number of index expressions");
1094                         iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
1095                                                                                            GetPerTupleExprContext(estate),
1096                                                                                            &isNull,
1097                                                                                            NULL);
1098                         indexpr_item = lnext(indexpr_item);
1099                 }
1100                 values[i] = iDatum;
1101                 isnull[i] = isNull;
1102         }
1103
1104         if (indexpr_item != NULL)
1105                 elog(ERROR, "wrong number of index expressions");
1106 }
1107
1108
1109 /*
1110  * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
1111  *
1112  * This routine updates the pg_class row of either an index or its parent
1113  * relation after CREATE INDEX or REINDEX.      Its rather bizarre API is designed
1114  * to ensure we can do all the necessary work in just one update.
1115  *
1116  * hasindex: set relhasindex to this value
1117  * isprimary: if true, set relhaspkey true; else no change
1118  * reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
1119  *              else no change
1120  * reltuples: set reltuples to this value
1121  *
1122  * relpages is also updated (using RelationGetNumberOfBlocks()).
1123  *
1124  * NOTE: an important side-effect of this operation is that an SI invalidation
1125  * message is sent out to all backends --- including me --- causing relcache
1126  * entries to be flushed or updated with the new data.  This must happen even
1127  * if we find that no change is needed in the pg_class row.  When updating
1128  * a heap entry, this ensures that other backends find out about the new
1129  * index.  When updating an index, it's important because some index AMs
1130  * expect a relcache flush to occur after REINDEX.
1131  */
1132 static void
1133 index_update_stats(Relation rel, bool hasindex, bool isprimary,
1134                                    Oid reltoastidxid, double reltuples)
1135 {
1136         BlockNumber relpages = RelationGetNumberOfBlocks(rel);
1137         Oid                     relid = RelationGetRelid(rel);
1138         Relation        pg_class;
1139         HeapTuple       tuple;
1140         Form_pg_class rd_rel;
1141         bool            dirty;
1142
1143         /*
1144          * We always update the pg_class row using a non-transactional,
1145          * overwrite-in-place update.  There are several reasons for this:
1146          *
1147          * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
1148          *
1149          * 2. We could be reindexing pg_class itself, in which case we can't move
1150          * its pg_class row because CatalogUpdateIndexes might not know about all
1151          * the indexes yet (see reindex_relation).
1152          *
1153          * 3. Because we execute CREATE INDEX with just share lock on the parent
1154          * rel (to allow concurrent index creations), an ordinary update could
1155          * suffer a tuple-concurrently-updated failure against another CREATE
1156          * INDEX committing at about the same time.  We can avoid that by having
1157          * them both do nontransactional updates (we assume they will both be
1158          * trying to change the pg_class row to the same thing, so it doesn't
1159          * matter which goes first).
1160          *
1161          * 4. Even with just a single CREATE INDEX, there's a risk factor because
1162          * someone else might be trying to open the rel while we commit, and this
1163          * creates a race condition as to whether he will see both or neither of
1164          * the pg_class row versions as valid.  Again, a non-transactional update
1165          * avoids the risk.  It is indeterminate which state of the row the other
1166          * process will see, but it doesn't matter (if he's only taking
1167          * AccessShareLock, then it's not critical that he see relhasindex true).
1168          *
1169          * It is safe to use a non-transactional update even though our
1170          * transaction could still fail before committing.      Setting relhasindex
1171          * true is safe even if there are no indexes (VACUUM will eventually fix
1172          * it), and of course the relpages and reltuples counts are correct (or at
1173          * least more so than the old values) regardless.
1174          */
1175
1176         pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1177
1178         /*
1179          * Make a copy of the tuple to update.  Normally we use the syscache, but
1180          * we can't rely on that during bootstrap or while reindexing pg_class
1181          * itself.
1182          */
1183         if (IsBootstrapProcessingMode() ||
1184                 ReindexIsProcessingHeap(RelationRelationId))
1185         {
1186                 /* don't assume syscache will work */
1187                 HeapScanDesc pg_class_scan;
1188                 ScanKeyData key[1];
1189
1190                 ScanKeyInit(&key[0],
1191                                         ObjectIdAttributeNumber,
1192                                         BTEqualStrategyNumber, F_OIDEQ,
1193                                         ObjectIdGetDatum(relid));
1194
1195                 pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key);
1196                 tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
1197                 tuple = heap_copytuple(tuple);
1198                 heap_endscan(pg_class_scan);
1199         }
1200         else
1201         {
1202                 /* normal case, use syscache */
1203                 tuple = SearchSysCacheCopy(RELOID,
1204                                                                    ObjectIdGetDatum(relid),
1205                                                                    0, 0, 0);
1206         }
1207
1208         if (!HeapTupleIsValid(tuple))
1209                 elog(ERROR, "could not find tuple for relation %u", relid);
1210         rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1211
1212         /* Apply required updates, if any, to copied tuple */
1213
1214         dirty = false;
1215         if (rd_rel->relhasindex != hasindex)
1216         {
1217                 rd_rel->relhasindex = hasindex;
1218                 dirty = true;
1219         }
1220         if (isprimary)
1221         {
1222                 if (!rd_rel->relhaspkey)
1223                 {
1224                         rd_rel->relhaspkey = true;
1225                         dirty = true;
1226                 }
1227         }
1228         if (OidIsValid(reltoastidxid))
1229         {
1230                 Assert(rd_rel->relkind == RELKIND_TOASTVALUE);
1231                 if (rd_rel->reltoastidxid != reltoastidxid)
1232                 {
1233                         rd_rel->reltoastidxid = reltoastidxid;
1234                         dirty = true;
1235                 }
1236         }
1237         if (rd_rel->reltuples != (float4) reltuples)
1238         {
1239                 rd_rel->reltuples = (float4) reltuples;
1240                 dirty = true;
1241         }
1242         if (rd_rel->relpages != (int32) relpages)
1243         {
1244                 rd_rel->relpages = (int32) relpages;
1245                 dirty = true;
1246         }
1247
1248         /*
1249          * If anything changed, write out the tuple
1250          */
1251         if (dirty)
1252         {
1253                 heap_inplace_update(pg_class, tuple);
1254                 /* the above sends a cache inval message */
1255         }
1256         else
1257         {
1258                 /* no need to change tuple, but force relcache inval anyway */
1259                 CacheInvalidateRelcacheByTuple(tuple);
1260         }
1261
1262         heap_freetuple(tuple);
1263
1264         heap_close(pg_class, RowExclusiveLock);
1265 }
1266
1267 /*
1268  * setNewRelfilenode            - assign a new relfilenode value to the relation
1269  *
1270  * Caller must already hold exclusive lock on the relation.
1271  *
1272  * The relation is marked with relfrozenxid=freezeXid (InvalidTransactionId
1273  * must be passed for indexes)
1274  */
1275 void
1276 setNewRelfilenode(Relation relation, TransactionId freezeXid)
1277 {
1278         Oid                     newrelfilenode;
1279         RelFileNode newrnode;
1280         Relation        pg_class;
1281         HeapTuple       tuple;
1282         Form_pg_class rd_rel;
1283
1284         /* Can't change relfilenode for nailed tables (indexes ok though) */
1285         Assert(!relation->rd_isnailed ||
1286                    relation->rd_rel->relkind == RELKIND_INDEX);
1287         /* Can't change for shared tables or indexes */
1288         Assert(!relation->rd_rel->relisshared);
1289         /* Indexes must have Invalid frozenxid; other relations must not */
1290         Assert((relation->rd_rel->relkind == RELKIND_INDEX &&
1291                         freezeXid == InvalidTransactionId) ||
1292                    TransactionIdIsNormal(freezeXid));
1293
1294         /* Allocate a new relfilenode */
1295         newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace,
1296                                                                            relation->rd_rel->relisshared,
1297                                                                            NULL);
1298
1299         /*
1300          * Find the pg_class tuple for the given relation.      This is not used
1301          * during bootstrap, so okay to use heap_update always.
1302          */
1303         pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1304
1305         tuple = SearchSysCacheCopy(RELOID,
1306                                                            ObjectIdGetDatum(RelationGetRelid(relation)),
1307                                                            0, 0, 0);
1308         if (!HeapTupleIsValid(tuple))
1309                 elog(ERROR, "could not find tuple for relation %u",
1310                          RelationGetRelid(relation));
1311         rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1312
1313         /*
1314          * ... and create storage for corresponding forks in the new relfilenode.
1315          *
1316          * NOTE: any conflict in relfilenode value will be caught here
1317          */
1318         newrnode = relation->rd_node;
1319         newrnode.relNode = newrelfilenode;
1320
1321         /*
1322          * Create the main fork, like heap_create() does, and drop the old
1323          * storage.
1324          */
1325         RelationCreateStorage(newrnode, relation->rd_istemp);
1326         smgrclosenode(newrnode);
1327         RelationDropStorage(relation);
1328
1329         /* update the pg_class row */
1330         rd_rel->relfilenode = newrelfilenode;
1331         rd_rel->relpages = 0;           /* it's empty until further notice */
1332         rd_rel->reltuples = 0;
1333         rd_rel->relfrozenxid = freezeXid;
1334         simple_heap_update(pg_class, &tuple->t_self, tuple);
1335         CatalogUpdateIndexes(pg_class, tuple);
1336
1337         heap_freetuple(tuple);
1338
1339         heap_close(pg_class, RowExclusiveLock);
1340
1341         /* Make sure the relfilenode change is visible */
1342         CommandCounterIncrement();
1343
1344         /* Mark the rel as having a new relfilenode in current transaction */
1345         RelationCacheMarkNewRelfilenode(relation);
1346 }
1347
1348
1349 /*
1350  * index_build - invoke access-method-specific index build procedure
1351  *
1352  * On entry, the index's catalog entries are valid, and its physical disk
1353  * file has been created but is empty.  We call the AM-specific build
1354  * procedure to fill in the index contents.  We then update the pg_class
1355  * entries of the index and heap relation as needed, using statistics
1356  * returned by ambuild as well as data passed by the caller.
1357  *
1358  * Note: when reindexing an existing index, isprimary can be false;
1359  * the index is already properly marked and need not be re-marked.
1360  *
1361  * Note: before Postgres 8.2, the passed-in heap and index Relations
1362  * were automatically closed by this routine.  This is no longer the case.
1363  * The caller opened 'em, and the caller should close 'em.
1364  */
1365 void
1366 index_build(Relation heapRelation,
1367                         Relation indexRelation,
1368                         IndexInfo *indexInfo,
1369                         bool isprimary)
1370 {
1371         RegProcedure procedure;
1372         IndexBuildResult *stats;
1373         Oid                     save_userid;
1374         bool            save_secdefcxt;
1375
1376         /*
1377          * sanity checks
1378          */
1379         Assert(RelationIsValid(indexRelation));
1380         Assert(PointerIsValid(indexRelation->rd_am));
1381
1382         procedure = indexRelation->rd_am->ambuild;
1383         Assert(RegProcedureIsValid(procedure));
1384
1385         /*
1386          * Switch to the table owner's userid, so that any index functions are
1387          * run as that user.
1388          */
1389         GetUserIdAndContext(&save_userid, &save_secdefcxt);
1390         SetUserIdAndContext(heapRelation->rd_rel->relowner, true);
1391
1392         /*
1393          * Call the access method's build procedure
1394          */
1395         stats = (IndexBuildResult *)
1396                 DatumGetPointer(OidFunctionCall3(procedure,
1397                                                                                  PointerGetDatum(heapRelation),
1398                                                                                  PointerGetDatum(indexRelation),
1399                                                                                  PointerGetDatum(indexInfo)));
1400         Assert(PointerIsValid(stats));
1401
1402         /* Restore userid */
1403         SetUserIdAndContext(save_userid, save_secdefcxt);
1404
1405         /*
1406          * If we found any potentially broken HOT chains, mark the index as not
1407          * being usable until the current transaction is below the event horizon.
1408          * See src/backend/access/heap/README.HOT for discussion.
1409          */
1410         if (indexInfo->ii_BrokenHotChain)
1411         {
1412                 Oid                     indexId = RelationGetRelid(indexRelation);
1413                 Relation        pg_index;
1414                 HeapTuple       indexTuple;
1415                 Form_pg_index indexForm;
1416
1417                 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1418
1419                 indexTuple = SearchSysCacheCopy(INDEXRELID,
1420                                                                                 ObjectIdGetDatum(indexId),
1421                                                                                 0, 0, 0);
1422                 if (!HeapTupleIsValid(indexTuple))
1423                         elog(ERROR, "cache lookup failed for index %u", indexId);
1424                 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1425
1426                 indexForm->indcheckxmin = true;
1427                 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
1428                 CatalogUpdateIndexes(pg_index, indexTuple);
1429
1430                 heap_freetuple(indexTuple);
1431                 heap_close(pg_index, RowExclusiveLock);
1432         }
1433
1434         /*
1435          * Update heap and index pg_class rows
1436          */
1437         index_update_stats(heapRelation,
1438                                            true,
1439                                            isprimary,
1440                                            (heapRelation->rd_rel->relkind == RELKIND_TOASTVALUE) ?
1441                                            RelationGetRelid(indexRelation) : InvalidOid,
1442                                            stats->heap_tuples);
1443
1444         index_update_stats(indexRelation,
1445                                            false,
1446                                            false,
1447                                            InvalidOid,
1448                                            stats->index_tuples);
1449
1450         /* Make the updated versions visible */
1451         CommandCounterIncrement();
1452 }
1453
1454
1455 /*
1456  * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
1457  *
1458  * This is called back from an access-method-specific index build procedure
1459  * after the AM has done whatever setup it needs.  The parent heap relation
1460  * is scanned to find tuples that should be entered into the index.  Each
1461  * such tuple is passed to the AM's callback routine, which does the right
1462  * things to add it to the new index.  After we return, the AM's index
1463  * build procedure does whatever cleanup is needed; in particular, it should
1464  * close the heap and index relations.
1465  *
1466  * The total count of heap tuples is returned.  This is for updating pg_class
1467  * statistics.  (It's annoying not to be able to do that here, but we can't
1468  * do it until after the relation is closed.)  Note that the index AM itself
1469  * must keep track of the number of index tuples; we don't do so here because
1470  * the AM might reject some of the tuples for its own reasons, such as being
1471  * unable to store NULLs.
1472  *
1473  * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
1474  * any potentially broken HOT chains.  Currently, we set this if there are
1475  * any RECENTLY_DEAD entries in a HOT chain, without trying very hard to
1476  * detect whether they're really incompatible with the chain tip.
1477  */
1478 double
1479 IndexBuildHeapScan(Relation heapRelation,
1480                                    Relation indexRelation,
1481                                    IndexInfo *indexInfo,
1482                                    bool allow_sync,
1483                                    IndexBuildCallback callback,
1484                                    void *callback_state)
1485 {
1486         HeapScanDesc scan;
1487         HeapTuple       heapTuple;
1488         Datum           values[INDEX_MAX_KEYS];
1489         bool            isnull[INDEX_MAX_KEYS];
1490         double          reltuples;
1491         List       *predicate;
1492         TupleTableSlot *slot;
1493         EState     *estate;
1494         ExprContext *econtext;
1495         Snapshot        snapshot;
1496         TransactionId OldestXmin;
1497         BlockNumber root_blkno = InvalidBlockNumber;
1498         OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1499
1500         /*
1501          * sanity checks
1502          */
1503         Assert(OidIsValid(indexRelation->rd_rel->relam));
1504
1505         /*
1506          * Need an EState for evaluation of index expressions and partial-index
1507          * predicates.  Also a slot to hold the current tuple.
1508          */
1509         estate = CreateExecutorState();
1510         econtext = GetPerTupleExprContext(estate);
1511         slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
1512
1513         /* Arrange for econtext's scan tuple to be the tuple under test */
1514         econtext->ecxt_scantuple = slot;
1515
1516         /* Set up execution state for predicate, if any. */
1517         predicate = (List *)
1518                 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
1519                                                 estate);
1520
1521         /*
1522          * Prepare for scan of the base relation.  In a normal index build, we use
1523          * SnapshotAny because we must retrieve all tuples and do our own time
1524          * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1525          * concurrent build, we take a regular MVCC snapshot and index whatever's
1526          * live according to that.      During bootstrap we just use SnapshotNow.
1527          */
1528         if (IsBootstrapProcessingMode())
1529         {
1530                 snapshot = SnapshotNow;
1531                 OldestXmin = InvalidTransactionId;              /* not used */
1532         }
1533         else if (indexInfo->ii_Concurrent)
1534         {
1535                 snapshot = RegisterSnapshot(GetTransactionSnapshot());
1536                 OldestXmin = InvalidTransactionId;              /* not used */
1537         }
1538         else
1539         {
1540                 snapshot = SnapshotAny;
1541                 /* okay to ignore lazy VACUUMs here */
1542                 OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
1543         }
1544
1545         scan = heap_beginscan_strat(heapRelation,       /* relation */
1546                                                                 snapshot,               /* snapshot */
1547                                                                 0,                              /* number of keys */
1548                                                                 NULL,                   /* scan key */
1549                                                                 true,                   /* buffer access strategy OK */
1550                                                                 allow_sync);    /* syncscan OK? */
1551
1552         reltuples = 0;
1553
1554         /*
1555          * Scan all tuples in the base relation.
1556          */
1557         while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1558         {
1559                 bool            tupleIsAlive;
1560
1561                 CHECK_FOR_INTERRUPTS();
1562
1563                 /*
1564                  * When dealing with a HOT-chain of updated tuples, we want to index
1565                  * the values of the live tuple (if any), but index it under the TID
1566                  * of the chain's root tuple.  This approach is necessary to preserve
1567                  * the HOT-chain structure in the heap. So we need to be able to find
1568                  * the root item offset for every tuple that's in a HOT-chain.  When
1569                  * first reaching a new page of the relation, call
1570                  * heap_get_root_tuples() to build a map of root item offsets on the
1571                  * page.
1572                  *
1573                  * It might look unsafe to use this information across buffer
1574                  * lock/unlock.  However, we hold ShareLock on the table so no
1575                  * ordinary insert/update/delete should occur; and we hold pin on the
1576                  * buffer continuously while visiting the page, so no pruning
1577                  * operation can occur either.
1578                  *
1579                  * Note the implied assumption that there is no more than one live
1580                  * tuple per HOT-chain ...
1581                  */
1582                 if (scan->rs_cblock != root_blkno)
1583                 {
1584                         Page            page = BufferGetPage(scan->rs_cbuf);
1585
1586                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
1587                         heap_get_root_tuples(page, root_offsets);
1588                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1589
1590                         root_blkno = scan->rs_cblock;
1591                 }
1592
1593                 if (snapshot == SnapshotAny)
1594                 {
1595                         /* do our own time qual check */
1596                         bool            indexIt;
1597
1598         recheck:
1599
1600                         /*
1601                          * We could possibly get away with not locking the buffer here,
1602                          * since caller should hold ShareLock on the relation, but let's
1603                          * be conservative about it.  (This remark is still correct even
1604                          * with HOT-pruning: our pin on the buffer prevents pruning.)
1605                          */
1606                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
1607
1608                         switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, OldestXmin,
1609                                                                                          scan->rs_cbuf))
1610                         {
1611                                 case HEAPTUPLE_DEAD:
1612                                         /* Definitely dead, we can ignore it */
1613                                         indexIt = false;
1614                                         tupleIsAlive = false;
1615                                         break;
1616                                 case HEAPTUPLE_LIVE:
1617                                         /* Normal case, index and unique-check it */
1618                                         indexIt = true;
1619                                         tupleIsAlive = true;
1620                                         break;
1621                                 case HEAPTUPLE_RECENTLY_DEAD:
1622
1623                                         /*
1624                                          * If tuple is recently deleted then we must index it
1625                                          * anyway to preserve MVCC semantics.  (Pre-existing
1626                                          * transactions could try to use the index after we finish
1627                                          * building it, and may need to see such tuples.)
1628                                          *
1629                                          * However, if it was HOT-updated then we must only index
1630                                          * the live tuple at the end of the HOT-chain.  Since this
1631                                          * breaks semantics for pre-existing snapshots, mark the
1632                                          * index as unusable for them.
1633                                          *
1634                                          * If we've already decided that the index will be unsafe
1635                                          * for old snapshots, we may as well stop indexing
1636                                          * recently-dead tuples, since there's no longer any
1637                                          * point.
1638                                          */
1639                                         if (HeapTupleIsHotUpdated(heapTuple))
1640                                         {
1641                                                 indexIt = false;
1642                                                 /* mark the index as unsafe for old snapshots */
1643                                                 indexInfo->ii_BrokenHotChain = true;
1644                                         }
1645                                         else if (indexInfo->ii_BrokenHotChain)
1646                                                 indexIt = false;
1647                                         else
1648                                                 indexIt = true;
1649                                         /* In any case, exclude the tuple from unique-checking */
1650                                         tupleIsAlive = false;
1651                                         break;
1652                                 case HEAPTUPLE_INSERT_IN_PROGRESS:
1653
1654                                         /*
1655                                          * Since caller should hold ShareLock or better, we should
1656                                          * not see any tuples inserted by open transactions ---
1657                                          * unless it's our own transaction. (Consider INSERT
1658                                          * followed by CREATE INDEX within a transaction.)      An
1659                                          * exception occurs when reindexing a system catalog,
1660                                          * because we often release lock on system catalogs before
1661                                          * committing.  In that case we wait for the inserting
1662                                          * transaction to finish and check again.  (We could do
1663                                          * that on user tables too, but since the case is not
1664                                          * expected it seems better to throw an error.)
1665                                          */
1666                                         if (!TransactionIdIsCurrentTransactionId(
1667                                                                   HeapTupleHeaderGetXmin(heapTuple->t_data)))
1668                                         {
1669                                                 if (!IsSystemRelation(heapRelation))
1670                                                         elog(ERROR, "concurrent insert in progress");
1671                                                 else
1672                                                 {
1673                                                         /*
1674                                                          * Must drop the lock on the buffer before we wait
1675                                                          */
1676                                                         TransactionId xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1677
1678                                                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1679                                                         XactLockTableWait(xwait);
1680                                                         goto recheck;
1681                                                 }
1682                                         }
1683
1684                                         /*
1685                                          * We must index such tuples, since if the index build
1686                                          * commits then they're good.
1687                                          */
1688                                         indexIt = true;
1689                                         tupleIsAlive = true;
1690                                         break;
1691                                 case HEAPTUPLE_DELETE_IN_PROGRESS:
1692
1693                                         /*
1694                                          * Since caller should hold ShareLock or better, we should
1695                                          * not see any tuples deleted by open transactions ---
1696                                          * unless it's our own transaction. (Consider DELETE
1697                                          * followed by CREATE INDEX within a transaction.)      An
1698                                          * exception occurs when reindexing a system catalog,
1699                                          * because we often release lock on system catalogs before
1700                                          * committing.  In that case we wait for the deleting
1701                                          * transaction to finish and check again.  (We could do
1702                                          * that on user tables too, but since the case is not
1703                                          * expected it seems better to throw an error.)
1704                                          */
1705                                         Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
1706                                         if (!TransactionIdIsCurrentTransactionId(
1707                                                                   HeapTupleHeaderGetXmax(heapTuple->t_data)))
1708                                         {
1709                                                 if (!IsSystemRelation(heapRelation))
1710                                                         elog(ERROR, "concurrent delete in progress");
1711                                                 else
1712                                                 {
1713                                                         /*
1714                                                          * Must drop the lock on the buffer before we wait
1715                                                          */
1716                                                         TransactionId xwait = HeapTupleHeaderGetXmax(heapTuple->t_data);
1717
1718                                                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1719                                                         XactLockTableWait(xwait);
1720                                                         goto recheck;
1721                                                 }
1722                                         }
1723
1724                                         /*
1725                                          * Otherwise, we have to treat these tuples just like
1726                                          * RECENTLY_DELETED ones.
1727                                          */
1728                                         if (HeapTupleIsHotUpdated(heapTuple))
1729                                         {
1730                                                 indexIt = false;
1731                                                 /* mark the index as unsafe for old snapshots */
1732                                                 indexInfo->ii_BrokenHotChain = true;
1733                                         }
1734                                         else if (indexInfo->ii_BrokenHotChain)
1735                                                 indexIt = false;
1736                                         else
1737                                                 indexIt = true;
1738                                         /* In any case, exclude the tuple from unique-checking */
1739                                         tupleIsAlive = false;
1740                                         break;
1741                                 default:
1742                                         elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1743                                         indexIt = tupleIsAlive = false;         /* keep compiler quiet */
1744                                         break;
1745                         }
1746
1747                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1748
1749                         if (!indexIt)
1750                                 continue;
1751                 }
1752                 else
1753                 {
1754                         /* heap_getnext did the time qual check */
1755                         tupleIsAlive = true;
1756                 }
1757
1758                 reltuples += 1;
1759
1760                 MemoryContextReset(econtext->ecxt_per_tuple_memory);
1761
1762                 /* Set up for predicate or expression evaluation */
1763                 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
1764
1765                 /*
1766                  * In a partial index, discard tuples that don't satisfy the
1767                  * predicate.
1768                  */
1769                 if (predicate != NIL)
1770                 {
1771                         if (!ExecQual(predicate, econtext, false))
1772                                 continue;
1773                 }
1774
1775                 /*
1776                  * For the current heap tuple, extract all the attributes we use in
1777                  * this index, and note which are null.  This also performs evaluation
1778                  * of any expressions needed.
1779                  */
1780                 FormIndexDatum(indexInfo,
1781                                            slot,
1782                                            estate,
1783                                            values,
1784                                            isnull);
1785
1786                 /*
1787                  * You'd think we should go ahead and build the index tuple here, but
1788                  * some index AMs want to do further processing on the data first.      So
1789                  * pass the values[] and isnull[] arrays, instead.
1790                  */
1791
1792                 if (HeapTupleIsHeapOnly(heapTuple))
1793                 {
1794                         /*
1795                          * For a heap-only tuple, pretend its TID is that of the root. See
1796                          * src/backend/access/heap/README.HOT for discussion.
1797                          */
1798                         HeapTupleData rootTuple;
1799                         OffsetNumber offnum;
1800
1801                         rootTuple = *heapTuple;
1802                         offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1803
1804                         Assert(OffsetNumberIsValid(root_offsets[offnum - 1]));
1805
1806                         ItemPointerSetOffsetNumber(&rootTuple.t_self,
1807                                                                            root_offsets[offnum - 1]);
1808
1809                         /* Call the AM's callback routine to process the tuple */
1810                         callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
1811                                          callback_state);
1812                 }
1813                 else
1814                 {
1815                         /* Call the AM's callback routine to process the tuple */
1816                         callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
1817                                          callback_state);
1818                 }
1819         }
1820
1821         heap_endscan(scan);
1822
1823         /* we can now forget our snapshot, if set */
1824         if (indexInfo->ii_Concurrent)
1825                 UnregisterSnapshot(snapshot);
1826
1827         ExecDropSingleTupleTableSlot(slot);
1828
1829         FreeExecutorState(estate);
1830
1831         /* These may have been pointing to the now-gone estate */
1832         indexInfo->ii_ExpressionsState = NIL;
1833         indexInfo->ii_PredicateState = NIL;
1834
1835         return reltuples;
1836 }
1837
1838
1839 /*
1840  * validate_index - support code for concurrent index builds
1841  *
1842  * We do a concurrent index build by first inserting the catalog entry for the
1843  * index via index_create(), marking it not indisready and not indisvalid.
1844  * Then we commit our transaction and start a new one, then we wait for all
1845  * transactions that could have been modifying the table to terminate.  Now
1846  * we know that any subsequently-started transactions will see the index and
1847  * honor its constraints on HOT updates; so while existing HOT-chains might
1848  * be broken with respect to the index, no currently live tuple will have an
1849  * incompatible HOT update done to it.  We now build the index normally via
1850  * index_build(), while holding a weak lock that allows concurrent
1851  * insert/update/delete.  Also, we index only tuples that are valid
1852  * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
1853  * build takes care to include recently-dead tuples.  This is OK because
1854  * we won't mark the index valid until all transactions that might be able
1855  * to see those tuples are gone.  The reason for doing that is to avoid
1856  * bogus unique-index failures due to concurrent UPDATEs (we might see
1857  * different versions of the same row as being valid when we pass over them,
1858  * if we used HeapTupleSatisfiesVacuum).  This leaves us with an index that
1859  * does not contain any tuples added to the table while we built the index.
1860  *
1861  * Next, we mark the index "indisready" (but still not "indisvalid") and
1862  * commit the second transaction and start a third.  Again we wait for all
1863  * transactions that could have been modifying the table to terminate.  Now
1864  * we know that any subsequently-started transactions will see the index and
1865  * insert their new tuples into it.  We then take a new reference snapshot
1866  * which is passed to validate_index().  Any tuples that are valid according
1867  * to this snap, but are not in the index, must be added to the index.
1868  * (Any tuples committed live after the snap will be inserted into the
1869  * index by their originating transaction.      Any tuples committed dead before
1870  * the snap need not be indexed, because we will wait out all transactions
1871  * that might care about them before we mark the index valid.)
1872  *
1873  * validate_index() works by first gathering all the TIDs currently in the
1874  * index, using a bulkdelete callback that just stores the TIDs and doesn't
1875  * ever say "delete it".  (This should be faster than a plain indexscan;
1876  * also, not all index AMs support full-index indexscan.)  Then we sort the
1877  * TIDs, and finally scan the table doing a "merge join" against the TID list
1878  * to see which tuples are missing from the index.      Thus we will ensure that
1879  * all tuples valid according to the reference snapshot are in the index.
1880  *
1881  * Building a unique index this way is tricky: we might try to insert a
1882  * tuple that is already dead or is in process of being deleted, and we
1883  * mustn't have a uniqueness failure against an updated version of the same
1884  * row.  We could try to check the tuple to see if it's already dead and tell
1885  * index_insert() not to do the uniqueness check, but that still leaves us
1886  * with a race condition against an in-progress update.  To handle that,
1887  * we expect the index AM to recheck liveness of the to-be-inserted tuple
1888  * before it declares a uniqueness error.
1889  *
1890  * After completing validate_index(), we wait until all transactions that
1891  * were alive at the time of the reference snapshot are gone; this is
1892  * necessary to be sure there are none left with a serializable snapshot
1893  * older than the reference (and hence possibly able to see tuples we did
1894  * not index).  Then we mark the index "indisvalid" and commit.  Subsequent
1895  * transactions will be able to use it for queries.
1896  *
1897  * Doing two full table scans is a brute-force strategy.  We could try to be
1898  * cleverer, eg storing new tuples in a special area of the table (perhaps
1899  * making the table append-only by setting use_fsm).  However that would
1900  * add yet more locking issues.
1901  */
1902 void
1903 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
1904 {
1905         Relation        heapRelation,
1906                                 indexRelation;
1907         IndexInfo  *indexInfo;
1908         IndexVacuumInfo ivinfo;
1909         v_i_state       state;
1910         Oid                     save_userid;
1911         bool            save_secdefcxt;
1912
1913         /* Open and lock the parent heap relation */
1914         heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1915         /* And the target index relation */
1916         indexRelation = index_open(indexId, RowExclusiveLock);
1917
1918         /*
1919          * Fetch info needed for index_insert.  (You might think this should be
1920          * passed in from DefineIndex, but its copy is long gone due to having
1921          * been built in a previous transaction.)
1922          */
1923         indexInfo = BuildIndexInfo(indexRelation);
1924
1925         /* mark build is concurrent just for consistency */
1926         indexInfo->ii_Concurrent = true;
1927
1928         /*
1929          * Switch to the table owner's userid, so that any index functions are
1930          * run as that user.
1931          */
1932         GetUserIdAndContext(&save_userid, &save_secdefcxt);
1933         SetUserIdAndContext(heapRelation->rd_rel->relowner, true);
1934
1935         /*
1936          * Scan the index and gather up all the TIDs into a tuplesort object.
1937          */
1938         ivinfo.index = indexRelation;
1939         ivinfo.vacuum_full = false;
1940         ivinfo.analyze_only = false;
1941         ivinfo.message_level = DEBUG2;
1942         ivinfo.num_heap_tuples = -1;
1943         ivinfo.strategy = NULL;
1944
1945         state.tuplesort = tuplesort_begin_datum(TIDOID,
1946                                                                                         TIDLessOperator, false,
1947                                                                                         maintenance_work_mem,
1948                                                                                         false);
1949         state.htups = state.itups = state.tups_inserted = 0;
1950
1951         (void) index_bulk_delete(&ivinfo, NULL,
1952                                                          validate_index_callback, (void *) &state);
1953
1954         /* Execute the sort */
1955         tuplesort_performsort(state.tuplesort);
1956
1957         /*
1958          * Now scan the heap and "merge" it with the index
1959          */
1960         validate_index_heapscan(heapRelation,
1961                                                         indexRelation,
1962                                                         indexInfo,
1963                                                         snapshot,
1964                                                         &state);
1965
1966         /* Done with tuplesort object */
1967         tuplesort_end(state.tuplesort);
1968
1969         elog(DEBUG2,
1970                  "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
1971                  state.htups, state.itups, state.tups_inserted);
1972
1973         /* Restore userid */
1974         SetUserIdAndContext(save_userid, save_secdefcxt);
1975
1976         /* Close rels, but keep locks */
1977         index_close(indexRelation, NoLock);
1978         heap_close(heapRelation, NoLock);
1979 }
1980
1981 /*
1982  * validate_index_callback - bulkdelete callback to collect the index TIDs
1983  */
1984 static bool
1985 validate_index_callback(ItemPointer itemptr, void *opaque)
1986 {
1987         v_i_state  *state = (v_i_state *) opaque;
1988
1989         tuplesort_putdatum(state->tuplesort, PointerGetDatum(itemptr), false);
1990         state->itups += 1;
1991         return false;                           /* never actually delete anything */
1992 }
1993
1994 /*
1995  * validate_index_heapscan - second table scan for concurrent index build
1996  *
1997  * This has much code in common with IndexBuildHeapScan, but it's enough
1998  * different that it seems cleaner to have two routines not one.
1999  */
2000 static void
2001 validate_index_heapscan(Relation heapRelation,
2002                                                 Relation indexRelation,
2003                                                 IndexInfo *indexInfo,
2004                                                 Snapshot snapshot,
2005                                                 v_i_state *state)
2006 {
2007         HeapScanDesc scan;
2008         HeapTuple       heapTuple;
2009         Datum           values[INDEX_MAX_KEYS];
2010         bool            isnull[INDEX_MAX_KEYS];
2011         List       *predicate;
2012         TupleTableSlot *slot;
2013         EState     *estate;
2014         ExprContext *econtext;
2015         BlockNumber root_blkno = InvalidBlockNumber;
2016         OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2017         bool            in_index[MaxHeapTuplesPerPage];
2018
2019         /* state variables for the merge */
2020         ItemPointer indexcursor = NULL;
2021         bool            tuplesort_empty = false;
2022
2023         /*
2024          * sanity checks
2025          */
2026         Assert(OidIsValid(indexRelation->rd_rel->relam));
2027
2028         /*
2029          * Need an EState for evaluation of index expressions and partial-index
2030          * predicates.  Also a slot to hold the current tuple.
2031          */
2032         estate = CreateExecutorState();
2033         econtext = GetPerTupleExprContext(estate);
2034         slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2035
2036         /* Arrange for econtext's scan tuple to be the tuple under test */
2037         econtext->ecxt_scantuple = slot;
2038
2039         /* Set up execution state for predicate, if any. */
2040         predicate = (List *)
2041                 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2042                                                 estate);
2043
2044         /*
2045          * Prepare for scan of the base relation.  We need just those tuples
2046          * satisfying the passed-in reference snapshot.  We must disable syncscan
2047          * here, because it's critical that we read from block zero forward to
2048          * match the sorted TIDs.
2049          */
2050         scan = heap_beginscan_strat(heapRelation,       /* relation */
2051                                                                 snapshot,               /* snapshot */
2052                                                                 0,                              /* number of keys */
2053                                                                 NULL,                   /* scan key */
2054                                                                 true,                   /* buffer access strategy OK */
2055                                                                 false);                 /* syncscan not OK */
2056
2057         /*
2058          * Scan all tuples matching the snapshot.
2059          */
2060         while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2061         {
2062                 ItemPointer heapcursor = &heapTuple->t_self;
2063                 ItemPointerData rootTuple;
2064                 OffsetNumber root_offnum;
2065
2066                 CHECK_FOR_INTERRUPTS();
2067
2068                 state->htups += 1;
2069
2070                 /*
2071                  * As commented in IndexBuildHeapScan, we should index heap-only
2072                  * tuples under the TIDs of their root tuples; so when we advance onto
2073                  * a new heap page, build a map of root item offsets on the page.
2074                  *
2075                  * This complicates merging against the tuplesort output: we will
2076                  * visit the live tuples in order by their offsets, but the root
2077                  * offsets that we need to compare against the index contents might be
2078                  * ordered differently.  So we might have to "look back" within the
2079                  * tuplesort output, but only within the current page.  We handle that
2080                  * by keeping a bool array in_index[] showing all the
2081                  * already-passed-over tuplesort output TIDs of the current page. We
2082                  * clear that array here, when advancing onto a new heap page.
2083                  */
2084                 if (scan->rs_cblock != root_blkno)
2085                 {
2086                         Page            page = BufferGetPage(scan->rs_cbuf);
2087
2088                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2089                         heap_get_root_tuples(page, root_offsets);
2090                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2091
2092                         memset(in_index, 0, sizeof(in_index));
2093
2094                         root_blkno = scan->rs_cblock;
2095                 }
2096
2097                 /* Convert actual tuple TID to root TID */
2098                 rootTuple = *heapcursor;
2099                 root_offnum = ItemPointerGetOffsetNumber(heapcursor);
2100
2101                 if (HeapTupleIsHeapOnly(heapTuple))
2102                 {
2103                         root_offnum = root_offsets[root_offnum - 1];
2104                         Assert(OffsetNumberIsValid(root_offnum));
2105                         ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
2106                 }
2107
2108                 /*
2109                  * "merge" by skipping through the index tuples until we find or pass
2110                  * the current root tuple.
2111                  */
2112                 while (!tuplesort_empty &&
2113                            (!indexcursor ||
2114                                 ItemPointerCompare(indexcursor, &rootTuple) < 0))
2115                 {
2116                         Datum           ts_val;
2117                         bool            ts_isnull;
2118
2119                         if (indexcursor)
2120                         {
2121                                 /*
2122                                  * Remember index items seen earlier on the current heap page
2123                                  */
2124                                 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
2125                                         in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
2126                                 pfree(indexcursor);
2127                         }
2128
2129                         tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
2130                                                                                                   &ts_val, &ts_isnull);
2131                         Assert(tuplesort_empty || !ts_isnull);
2132                         indexcursor = (ItemPointer) DatumGetPointer(ts_val);
2133                 }
2134
2135                 /*
2136                  * If the tuplesort has overshot *and* we didn't see a match earlier,
2137                  * then this tuple is missing from the index, so insert it.
2138                  */
2139                 if ((tuplesort_empty ||
2140                          ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
2141                         !in_index[root_offnum - 1])
2142                 {
2143                         MemoryContextReset(econtext->ecxt_per_tuple_memory);
2144
2145                         /* Set up for predicate or expression evaluation */
2146                         ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2147
2148                         /*
2149                          * In a partial index, discard tuples that don't satisfy the
2150                          * predicate.
2151                          */
2152                         if (predicate != NIL)
2153                         {
2154                                 if (!ExecQual(predicate, econtext, false))
2155                                         continue;
2156                         }
2157
2158                         /*
2159                          * For the current heap tuple, extract all the attributes we use
2160                          * in this index, and note which are null.      This also performs
2161                          * evaluation of any expressions needed.
2162                          */
2163                         FormIndexDatum(indexInfo,
2164                                                    slot,
2165                                                    estate,
2166                                                    values,
2167                                                    isnull);
2168
2169                         /*
2170                          * You'd think we should go ahead and build the index tuple here,
2171                          * but some index AMs want to do further processing on the data
2172                          * first. So pass the values[] and isnull[] arrays, instead.
2173                          */
2174
2175                         /*
2176                          * If the tuple is already committed dead, you might think we
2177                          * could suppress uniqueness checking, but this is no longer true
2178                          * in the presence of HOT, because the insert is actually a proxy
2179                          * for a uniqueness check on the whole HOT-chain.  That is, the
2180                          * tuple we have here could be dead because it was already
2181                          * HOT-updated, and if so the updating transaction will not have
2182                          * thought it should insert index entries.      The index AM will
2183                          * check the whole HOT-chain and correctly detect a conflict if
2184                          * there is one.
2185                          */
2186
2187                         index_insert(indexRelation,
2188                                                  values,
2189                                                  isnull,
2190                                                  &rootTuple,
2191                                                  heapRelation,
2192                                                  indexInfo->ii_Unique);
2193
2194                         state->tups_inserted += 1;
2195                 }
2196         }
2197
2198         heap_endscan(scan);
2199
2200         ExecDropSingleTupleTableSlot(slot);
2201
2202         FreeExecutorState(estate);
2203
2204         /* These may have been pointing to the now-gone estate */
2205         indexInfo->ii_ExpressionsState = NIL;
2206         indexInfo->ii_PredicateState = NIL;
2207 }
2208
2209
2210 /*
2211  * IndexGetRelation: given an index's relation OID, get the OID of the
2212  * relation it is an index on.  Uses the system cache.
2213  */
2214 static Oid
2215 IndexGetRelation(Oid indexId)
2216 {
2217         HeapTuple       tuple;
2218         Form_pg_index index;
2219         Oid                     result;
2220
2221         tuple = SearchSysCache(INDEXRELID,
2222                                                    ObjectIdGetDatum(indexId),
2223                                                    0, 0, 0);
2224         if (!HeapTupleIsValid(tuple))
2225                 elog(ERROR, "cache lookup failed for index %u", indexId);
2226         index = (Form_pg_index) GETSTRUCT(tuple);
2227         Assert(index->indexrelid == indexId);
2228
2229         result = index->indrelid;
2230         ReleaseSysCache(tuple);
2231         return result;
2232 }
2233
2234 /*
2235  * reindex_index - This routine is used to recreate a single index
2236  */
2237 void
2238 reindex_index(Oid indexId)
2239 {
2240         Relation        iRel,
2241                                 heapRelation,
2242                                 pg_index;
2243         Oid                     heapId;
2244         bool            inplace;
2245         IndexInfo  *indexInfo;
2246         HeapTuple       indexTuple;
2247         Form_pg_index indexForm;
2248
2249         /*
2250          * Open and lock the parent heap relation.      ShareLock is sufficient since
2251          * we only need to be sure no schema or data changes are going on.
2252          */
2253         heapId = IndexGetRelation(indexId);
2254         heapRelation = heap_open(heapId, ShareLock);
2255
2256         /*
2257          * Open the target index relation and get an exclusive lock on it, to
2258          * ensure that no one else is touching this particular index.
2259          */
2260         iRel = index_open(indexId, AccessExclusiveLock);
2261
2262         /*
2263          * Don't allow reindex on temp tables of other backends ... their local
2264          * buffer manager is not going to cope.
2265          */
2266         if (RELATION_IS_OTHER_TEMP(iRel))
2267                 ereport(ERROR,
2268                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2269                                  errmsg("cannot reindex temporary tables of other sessions")));
2270
2271         /*
2272          * Also check for active uses of the index in the current transaction;
2273          * we don't want to reindex underneath an open indexscan.
2274          */
2275         CheckTableNotInUse(iRel, "REINDEX INDEX");
2276
2277         /*
2278          * If it's a shared index, we must do inplace processing (because we have
2279          * no way to update relfilenode in other databases).  Otherwise we can do
2280          * it the normal transaction-safe way.
2281          *
2282          * Since inplace processing isn't crash-safe, we only allow it in a
2283          * standalone backend.  (In the REINDEX TABLE and REINDEX DATABASE cases,
2284          * the caller should have detected this.)
2285          */
2286         inplace = iRel->rd_rel->relisshared;
2287
2288         if (inplace && IsUnderPostmaster)
2289                 ereport(ERROR,
2290                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2291                                  errmsg("shared index \"%s\" can only be reindexed in stand-alone mode",
2292                                                 RelationGetRelationName(iRel))));
2293
2294         PG_TRY();
2295         {
2296                 /* Suppress use of the target index while rebuilding it */
2297                 SetReindexProcessing(heapId, indexId);
2298
2299                 /* Fetch info needed for index_build */
2300                 indexInfo = BuildIndexInfo(iRel);
2301
2302                 if (inplace)
2303                 {
2304                         /*
2305                          * Truncate the actual file (and discard buffers).
2306                          */
2307                         RelationTruncate(iRel, 0);
2308                 }
2309                 else
2310                 {
2311                         /*
2312                          * We'll build a new physical relation for the index.
2313                          */
2314                         setNewRelfilenode(iRel, InvalidTransactionId);
2315                 }
2316
2317                 /* Initialize the index and rebuild */
2318                 /* Note: we do not need to re-establish pkey setting */
2319                 index_build(heapRelation, iRel, indexInfo, false);
2320         }
2321         PG_CATCH();
2322         {
2323                 /* Make sure flag gets cleared on error exit */
2324                 ResetReindexProcessing();
2325                 PG_RE_THROW();
2326         }
2327         PG_END_TRY();
2328         ResetReindexProcessing();
2329
2330         /*
2331          * If the index is marked invalid or not ready (ie, it's from a failed
2332          * CREATE INDEX CONCURRENTLY), we can now mark it valid.  This allows
2333          * REINDEX to be used to clean up in such cases.
2334          *
2335          * We can also reset indcheckxmin, because we have now done a
2336          * non-concurrent index build, *except* in the case where index_build
2337          * found some still-broken HOT chains.
2338          */
2339         pg_index = heap_open(IndexRelationId, RowExclusiveLock);
2340
2341         indexTuple = SearchSysCacheCopy(INDEXRELID,
2342                                                                         ObjectIdGetDatum(indexId),
2343                                                                         0, 0, 0);
2344         if (!HeapTupleIsValid(indexTuple))
2345                 elog(ERROR, "cache lookup failed for index %u", indexId);
2346         indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2347
2348         if (!indexForm->indisvalid || !indexForm->indisready ||
2349                 (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
2350         {
2351                 indexForm->indisvalid = true;
2352                 indexForm->indisready = true;
2353                 if (!indexInfo->ii_BrokenHotChain)
2354                         indexForm->indcheckxmin = false;
2355                 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
2356                 CatalogUpdateIndexes(pg_index, indexTuple);
2357         }
2358         heap_close(pg_index, RowExclusiveLock);
2359
2360         /* Close rels, but keep locks */
2361         index_close(iRel, NoLock);
2362         heap_close(heapRelation, NoLock);
2363 }
2364
2365 /*
2366  * reindex_relation - This routine is used to recreate all indexes
2367  * of a relation (and optionally its toast relation too, if any).
2368  *
2369  * Returns true if any indexes were rebuilt.  Note that a
2370  * CommandCounterIncrement will occur after each index rebuild.
2371  */
2372 bool
2373 reindex_relation(Oid relid, bool toast_too)
2374 {
2375         Relation        rel;
2376         Oid                     toast_relid;
2377         bool            is_pg_class;
2378         bool            result;
2379         List       *indexIds,
2380                            *doneIndexes;
2381         ListCell   *indexId;
2382
2383         /*
2384          * Open and lock the relation.  ShareLock is sufficient since we only need
2385          * to prevent schema and data changes in it.
2386          */
2387         rel = heap_open(relid, ShareLock);
2388
2389         toast_relid = rel->rd_rel->reltoastrelid;
2390
2391         /*
2392          * Get the list of index OIDs for this relation.  (We trust to the
2393          * relcache to get this with a sequential scan if ignoring system
2394          * indexes.)
2395          */
2396         indexIds = RelationGetIndexList(rel);
2397
2398         /*
2399          * reindex_index will attempt to update the pg_class rows for the relation
2400          * and index.  If we are processing pg_class itself, we want to make sure
2401          * that the updates do not try to insert index entries into indexes we
2402          * have not processed yet.      (When we are trying to recover from corrupted
2403          * indexes, that could easily cause a crash.) We can accomplish this
2404          * because CatalogUpdateIndexes will use the relcache's index list to know
2405          * which indexes to update. We just force the index list to be only the
2406          * stuff we've processed.
2407          *
2408          * It is okay to not insert entries into the indexes we have not processed
2409          * yet because all of this is transaction-safe.  If we fail partway
2410          * through, the updated rows are dead and it doesn't matter whether they
2411          * have index entries.  Also, a new pg_class index will be created with an
2412          * entry for its own pg_class row because we do setNewRelfilenode() before
2413          * we do index_build().
2414          *
2415          * Note that we also clear pg_class's rd_oidindex until the loop is done,
2416          * so that that index can't be accessed either.  This means we cannot
2417          * safely generate new relation OIDs while in the loop; shouldn't be a
2418          * problem.
2419          */
2420         is_pg_class = (RelationGetRelid(rel) == RelationRelationId);
2421
2422         /* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */
2423         if (is_pg_class)
2424                 (void) RelationGetIndexAttrBitmap(rel);
2425
2426         /* Reindex all the indexes. */
2427         doneIndexes = NIL;
2428         foreach(indexId, indexIds)
2429         {
2430                 Oid                     indexOid = lfirst_oid(indexId);
2431
2432                 if (is_pg_class)
2433                         RelationSetIndexList(rel, doneIndexes, InvalidOid);
2434
2435                 reindex_index(indexOid);
2436
2437                 CommandCounterIncrement();
2438
2439                 if (is_pg_class)
2440                         doneIndexes = lappend_oid(doneIndexes, indexOid);
2441         }
2442
2443         if (is_pg_class)
2444                 RelationSetIndexList(rel, indexIds, ClassOidIndexId);
2445
2446         /*
2447          * Close rel, but continue to hold the lock.
2448          */
2449         heap_close(rel, NoLock);
2450
2451         result = (indexIds != NIL);
2452
2453         /*
2454          * If the relation has a secondary toast rel, reindex that too while we
2455          * still hold the lock on the master table.
2456          */
2457         if (toast_too && OidIsValid(toast_relid))
2458                 result |= reindex_relation(toast_relid, false);
2459
2460         return result;
2461 }