Make nbtree split REDO locking match original execution.
[pgsql.git] / src / backend / catalog / index.c
blob1be27eec52e6e1150f8d29d01d0941f416dd7516
1 /*-------------------------------------------------------------------------
3 * index.c
4 * code to create and destroy POSTGRES index relations
6 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * src/backend/catalog/index.c
14 * INTERFACE ROUTINES
15 * index_create() - Create a cataloged index relation
16 * index_drop() - Removes index relation from catalogs
17 * BuildIndexInfo() - Prepare to insert index tuples
18 * FormIndexDatum() - Construct datum vector for one index tuple
20 *-------------------------------------------------------------------------
22 #include "postgres.h"
24 #include <unistd.h>
26 #include "access/amapi.h"
27 #include "access/heapam.h"
28 #include "access/multixact.h"
29 #include "access/reloptions.h"
30 #include "access/relscan.h"
31 #include "access/sysattr.h"
32 #include "access/tableam.h"
33 #include "access/transam.h"
34 #include "access/visibilitymap.h"
35 #include "access/xact.h"
36 #include "bootstrap/bootstrap.h"
37 #include "catalog/binary_upgrade.h"
38 #include "catalog/catalog.h"
39 #include "catalog/dependency.h"
40 #include "catalog/heap.h"
41 #include "catalog/index.h"
42 #include "catalog/objectaccess.h"
43 #include "catalog/partition.h"
44 #include "catalog/pg_am.h"
45 #include "catalog/pg_collation.h"
46 #include "catalog/pg_constraint.h"
47 #include "catalog/pg_depend.h"
48 #include "catalog/pg_description.h"
49 #include "catalog/pg_inherits.h"
50 #include "catalog/pg_opclass.h"
51 #include "catalog/pg_operator.h"
52 #include "catalog/pg_tablespace.h"
53 #include "catalog/pg_trigger.h"
54 #include "catalog/pg_type.h"
55 #include "catalog/storage.h"
56 #include "commands/event_trigger.h"
57 #include "commands/progress.h"
58 #include "commands/tablecmds.h"
59 #include "commands/trigger.h"
60 #include "executor/executor.h"
61 #include "miscadmin.h"
62 #include "nodes/makefuncs.h"
63 #include "nodes/nodeFuncs.h"
64 #include "optimizer/optimizer.h"
65 #include "parser/parser.h"
66 #include "pgstat.h"
67 #include "rewrite/rewriteManip.h"
68 #include "storage/bufmgr.h"
69 #include "storage/lmgr.h"
70 #include "storage/predicate.h"
71 #include "storage/procarray.h"
72 #include "storage/smgr.h"
73 #include "utils/builtins.h"
74 #include "utils/fmgroids.h"
75 #include "utils/guc.h"
76 #include "utils/inval.h"
77 #include "utils/lsyscache.h"
78 #include "utils/memutils.h"
79 #include "utils/pg_rusage.h"
80 #include "utils/snapmgr.h"
81 #include "utils/syscache.h"
82 #include "utils/tuplesort.h"
84 /* Potentially set by pg_upgrade_support functions */
85 Oid binary_upgrade_next_index_pg_class_oid = InvalidOid;
88 * Pointer-free representation of variables used when reindexing system
89 * catalogs; we use this to propagate those values to parallel workers.
91 typedef struct
93 Oid currentlyReindexedHeap;
94 Oid currentlyReindexedIndex;
95 int numPendingReindexedIndexes;
96 Oid pendingReindexedIndexes[FLEXIBLE_ARRAY_MEMBER];
97 } SerializedReindexState;
99 /* non-export function prototypes */
100 static bool relationHasPrimaryKey(Relation rel);
101 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
102 IndexInfo *indexInfo,
103 List *indexColNames,
104 Oid accessMethodObjectId,
105 Oid *collationObjectId,
106 Oid *classObjectId);
107 static void InitializeAttributeOids(Relation indexRelation,
108 int numatts, Oid indexoid);
109 static void AppendAttributeTuples(Relation indexRelation, Datum *attopts);
110 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
111 Oid parentIndexId,
112 IndexInfo *indexInfo,
113 Oid *collationOids,
114 Oid *classOids,
115 int16 *coloptions,
116 bool primary,
117 bool isexclusion,
118 bool immediate,
119 bool isvalid,
120 bool isready);
121 static void index_update_stats(Relation rel,
122 bool hasindex,
123 double reltuples);
124 static void IndexCheckExclusion(Relation heapRelation,
125 Relation indexRelation,
126 IndexInfo *indexInfo);
127 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
128 static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
129 static void SetReindexProcessing(Oid heapOid, Oid indexOid);
130 static void ResetReindexProcessing(void);
131 static void SetReindexPending(List *indexes);
132 static void RemoveReindexPending(Oid indexOid);
136 * relationHasPrimaryKey
137 * See whether an existing relation has a primary key.
139 * Caller must have suitable lock on the relation.
141 * Note: we intentionally do not check indisvalid here; that's because this
142 * is used to enforce the rule that there can be only one indisprimary index,
143 * and we want that to be true even if said index is invalid.
145 static bool
146 relationHasPrimaryKey(Relation rel)
148 bool result = false;
149 List *indexoidlist;
150 ListCell *indexoidscan;
153 * Get the list of index OIDs for the table from the relcache, and look up
154 * each one in the pg_index syscache until we find one marked primary key
155 * (hopefully there isn't more than one such).
157 indexoidlist = RelationGetIndexList(rel);
159 foreach(indexoidscan, indexoidlist)
161 Oid indexoid = lfirst_oid(indexoidscan);
162 HeapTuple indexTuple;
164 indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
165 if (!HeapTupleIsValid(indexTuple)) /* should not happen */
166 elog(ERROR, "cache lookup failed for index %u", indexoid);
167 result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
168 ReleaseSysCache(indexTuple);
169 if (result)
170 break;
173 list_free(indexoidlist);
175 return result;
179 * index_check_primary_key
180 * Apply special checks needed before creating a PRIMARY KEY index
182 * This processing used to be in DefineIndex(), but has been split out
183 * so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
185 * We check for a pre-existing primary key, and that all columns of the index
186 * are simple column references (not expressions), and that all those
187 * columns are marked NOT NULL. If not, fail.
189 * We used to automatically change unmarked columns to NOT NULL here by doing
190 * our own local ALTER TABLE command. But that doesn't work well if we're
191 * executing one subcommand of an ALTER TABLE: the operations may not get
192 * performed in the right order overall. Now we expect that the parser
193 * inserted any required ALTER TABLE SET NOT NULL operations before trying
194 * to create a primary-key index.
196 * Caller had better have at least ShareLock on the table, else the not-null
197 * checking isn't trustworthy.
199 void
200 index_check_primary_key(Relation heapRel,
201 IndexInfo *indexInfo,
202 bool is_alter_table,
203 IndexStmt *stmt)
205 int i;
208 * If ALTER TABLE or CREATE TABLE .. PARTITION OF, check that there isn't
209 * already a PRIMARY KEY. In CREATE TABLE for an ordinary relation, we
210 * have faith that the parser rejected multiple pkey clauses; and CREATE
211 * INDEX doesn't have a way to say PRIMARY KEY, so it's no problem either.
213 if ((is_alter_table || heapRel->rd_rel->relispartition) &&
214 relationHasPrimaryKey(heapRel))
216 ereport(ERROR,
217 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
218 errmsg("multiple primary keys for table \"%s\" are not allowed",
219 RelationGetRelationName(heapRel))));
223 * Check that all of the attributes in a primary key are marked as not
224 * null. (We don't really expect to see that; it'd mean the parser messed
225 * up. But it seems wise to check anyway.)
227 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
229 AttrNumber attnum = indexInfo->ii_IndexAttrNumbers[i];
230 HeapTuple atttuple;
231 Form_pg_attribute attform;
233 if (attnum == 0)
234 ereport(ERROR,
235 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
236 errmsg("primary keys cannot be expressions")));
238 /* System attributes are never null, so no need to check */
239 if (attnum < 0)
240 continue;
242 atttuple = SearchSysCache2(ATTNUM,
243 ObjectIdGetDatum(RelationGetRelid(heapRel)),
244 Int16GetDatum(attnum));
245 if (!HeapTupleIsValid(atttuple))
246 elog(ERROR, "cache lookup failed for attribute %d of relation %u",
247 attnum, RelationGetRelid(heapRel));
248 attform = (Form_pg_attribute) GETSTRUCT(atttuple);
250 if (!attform->attnotnull)
251 ereport(ERROR,
252 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
253 errmsg("primary key column \"%s\" is not marked NOT NULL",
254 NameStr(attform->attname))));
256 ReleaseSysCache(atttuple);
261 * ConstructTupleDescriptor
263 * Build an index tuple descriptor for a new index
265 static TupleDesc
266 ConstructTupleDescriptor(Relation heapRelation,
267 IndexInfo *indexInfo,
268 List *indexColNames,
269 Oid accessMethodObjectId,
270 Oid *collationObjectId,
271 Oid *classObjectId)
273 int numatts = indexInfo->ii_NumIndexAttrs;
274 int numkeyatts = indexInfo->ii_NumIndexKeyAttrs;
275 ListCell *colnames_item = list_head(indexColNames);
276 ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
277 IndexAmRoutine *amroutine;
278 TupleDesc heapTupDesc;
279 TupleDesc indexTupDesc;
280 int natts; /* #atts in heap rel --- for error checks */
281 int i;
283 /* We need access to the index AM's API struct */
284 amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId, false);
286 /* ... and to the table's tuple descriptor */
287 heapTupDesc = RelationGetDescr(heapRelation);
288 natts = RelationGetForm(heapRelation)->relnatts;
291 * allocate the new tuple descriptor
293 indexTupDesc = CreateTemplateTupleDesc(numatts);
296 * Fill in the pg_attribute row.
298 for (i = 0; i < numatts; i++)
300 AttrNumber atnum = indexInfo->ii_IndexAttrNumbers[i];
301 Form_pg_attribute to = TupleDescAttr(indexTupDesc, i);
302 HeapTuple tuple;
303 Form_pg_type typeTup;
304 Form_pg_opclass opclassTup;
305 Oid keyType;
307 MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
308 to->attnum = i + 1;
309 to->attstattarget = -1;
310 to->attcacheoff = -1;
311 to->attislocal = true;
312 to->attcollation = (i < numkeyatts) ?
313 collationObjectId[i] : InvalidOid;
316 * Set the attribute name as specified by caller.
318 if (colnames_item == NULL) /* shouldn't happen */
319 elog(ERROR, "too few entries in colnames list");
320 namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
321 colnames_item = lnext(indexColNames, colnames_item);
324 * For simple index columns, we copy some pg_attribute fields from the
325 * parent relation. For expressions we have to look at the expression
326 * result.
328 if (atnum != 0)
330 /* Simple index column */
331 const FormData_pg_attribute *from;
333 Assert(atnum > 0); /* should've been caught above */
335 if (atnum > natts) /* safety check */
336 elog(ERROR, "invalid column number %d", atnum);
337 from = TupleDescAttr(heapTupDesc,
338 AttrNumberGetAttrOffset(atnum));
340 to->atttypid = from->atttypid;
341 to->attlen = from->attlen;
342 to->attndims = from->attndims;
343 to->atttypmod = from->atttypmod;
344 to->attbyval = from->attbyval;
345 to->attstorage = from->attstorage;
346 to->attalign = from->attalign;
348 else
350 /* Expressional index */
351 Node *indexkey;
353 if (indexpr_item == NULL) /* shouldn't happen */
354 elog(ERROR, "too few entries in indexprs list");
355 indexkey = (Node *) lfirst(indexpr_item);
356 indexpr_item = lnext(indexInfo->ii_Expressions, indexpr_item);
359 * Lookup the expression type in pg_type for the type length etc.
361 keyType = exprType(indexkey);
362 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
363 if (!HeapTupleIsValid(tuple))
364 elog(ERROR, "cache lookup failed for type %u", keyType);
365 typeTup = (Form_pg_type) GETSTRUCT(tuple);
368 * Assign some of the attributes values. Leave the rest.
370 to->atttypid = keyType;
371 to->attlen = typeTup->typlen;
372 to->attbyval = typeTup->typbyval;
373 to->attstorage = typeTup->typstorage;
374 to->attalign = typeTup->typalign;
375 to->atttypmod = exprTypmod(indexkey);
377 ReleaseSysCache(tuple);
380 * Make sure the expression yields a type that's safe to store in
381 * an index. We need this defense because we have index opclasses
382 * for pseudo-types such as "record", and the actually stored type
383 * had better be safe; eg, a named composite type is okay, an
384 * anonymous record type is not. The test is the same as for
385 * whether a table column is of a safe type (which is why we
386 * needn't check for the non-expression case).
388 CheckAttributeType(NameStr(to->attname),
389 to->atttypid, to->attcollation,
390 NIL, 0);
394 * We do not yet have the correct relation OID for the index, so just
395 * set it invalid for now. InitializeAttributeOids() will fix it
396 * later.
398 to->attrelid = InvalidOid;
401 * Check the opclass and index AM to see if either provides a keytype
402 * (overriding the attribute type). Opclass (if exists) takes
403 * precedence.
405 keyType = amroutine->amkeytype;
407 if (i < indexInfo->ii_NumIndexKeyAttrs)
409 tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
410 if (!HeapTupleIsValid(tuple))
411 elog(ERROR, "cache lookup failed for opclass %u",
412 classObjectId[i]);
413 opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
414 if (OidIsValid(opclassTup->opckeytype))
415 keyType = opclassTup->opckeytype;
418 * If keytype is specified as ANYELEMENT, and opcintype is
419 * ANYARRAY, then the attribute type must be an array (else it'd
420 * not have matched this opclass); use its element type.
422 * We could also allow ANYCOMPATIBLE/ANYCOMPATIBLEARRAY here, but
423 * there seems no need to do so; there's no reason to declare an
424 * opclass as taking ANYCOMPATIBLEARRAY rather than ANYARRAY.
426 if (keyType == ANYELEMENTOID && opclassTup->opcintype == ANYARRAYOID)
428 keyType = get_base_element_type(to->atttypid);
429 if (!OidIsValid(keyType))
430 elog(ERROR, "could not get element type of array type %u",
431 to->atttypid);
434 ReleaseSysCache(tuple);
438 * If a key type different from the heap value is specified, update
439 * the type-related fields in the index tupdesc.
441 if (OidIsValid(keyType) && keyType != to->atttypid)
443 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
444 if (!HeapTupleIsValid(tuple))
445 elog(ERROR, "cache lookup failed for type %u", keyType);
446 typeTup = (Form_pg_type) GETSTRUCT(tuple);
448 to->atttypid = keyType;
449 to->atttypmod = -1;
450 to->attlen = typeTup->typlen;
451 to->attbyval = typeTup->typbyval;
452 to->attalign = typeTup->typalign;
453 to->attstorage = typeTup->typstorage;
455 ReleaseSysCache(tuple);
459 pfree(amroutine);
461 return indexTupDesc;
464 /* ----------------------------------------------------------------
465 * InitializeAttributeOids
466 * ----------------------------------------------------------------
468 static void
469 InitializeAttributeOids(Relation indexRelation,
470 int numatts,
471 Oid indexoid)
473 TupleDesc tupleDescriptor;
474 int i;
476 tupleDescriptor = RelationGetDescr(indexRelation);
478 for (i = 0; i < numatts; i += 1)
479 TupleDescAttr(tupleDescriptor, i)->attrelid = indexoid;
482 /* ----------------------------------------------------------------
483 * AppendAttributeTuples
484 * ----------------------------------------------------------------
486 static void
487 AppendAttributeTuples(Relation indexRelation, Datum *attopts)
489 Relation pg_attribute;
490 CatalogIndexState indstate;
491 TupleDesc indexTupDesc;
494 * open the attribute relation and its indexes
496 pg_attribute = table_open(AttributeRelationId, RowExclusiveLock);
498 indstate = CatalogOpenIndexes(pg_attribute);
501 * insert data from new index's tupdesc into pg_attribute
503 indexTupDesc = RelationGetDescr(indexRelation);
505 InsertPgAttributeTuples(pg_attribute, indexTupDesc, InvalidOid, attopts, indstate);
507 CatalogCloseIndexes(indstate);
509 table_close(pg_attribute, RowExclusiveLock);
512 /* ----------------------------------------------------------------
513 * UpdateIndexRelation
515 * Construct and insert a new entry in the pg_index catalog
516 * ----------------------------------------------------------------
518 static void
519 UpdateIndexRelation(Oid indexoid,
520 Oid heapoid,
521 Oid parentIndexId,
522 IndexInfo *indexInfo,
523 Oid *collationOids,
524 Oid *classOids,
525 int16 *coloptions,
526 bool primary,
527 bool isexclusion,
528 bool immediate,
529 bool isvalid,
530 bool isready)
532 int2vector *indkey;
533 oidvector *indcollation;
534 oidvector *indclass;
535 int2vector *indoption;
536 Datum exprsDatum;
537 Datum predDatum;
538 Datum values[Natts_pg_index];
539 bool nulls[Natts_pg_index];
540 Relation pg_index;
541 HeapTuple tuple;
542 int i;
545 * Copy the index key, opclass, and indoption info into arrays (should we
546 * make the caller pass them like this to start with?)
548 indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
549 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
550 indkey->values[i] = indexInfo->ii_IndexAttrNumbers[i];
551 indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexKeyAttrs);
552 indclass = buildoidvector(classOids, indexInfo->ii_NumIndexKeyAttrs);
553 indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexKeyAttrs);
556 * Convert the index expressions (if any) to a text datum
558 if (indexInfo->ii_Expressions != NIL)
560 char *exprsString;
562 exprsString = nodeToString(indexInfo->ii_Expressions);
563 exprsDatum = CStringGetTextDatum(exprsString);
564 pfree(exprsString);
566 else
567 exprsDatum = (Datum) 0;
570 * Convert the index predicate (if any) to a text datum. Note we convert
571 * implicit-AND format to normal explicit-AND for storage.
573 if (indexInfo->ii_Predicate != NIL)
575 char *predString;
577 predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
578 predDatum = CStringGetTextDatum(predString);
579 pfree(predString);
581 else
582 predDatum = (Datum) 0;
586 * open the system catalog index relation
588 pg_index = table_open(IndexRelationId, RowExclusiveLock);
591 * Build a pg_index tuple
593 MemSet(nulls, false, sizeof(nulls));
595 values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
596 values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
597 values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
598 values[Anum_pg_index_indnkeyatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexKeyAttrs);
599 values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
600 values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
601 values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
602 values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
603 values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
604 values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
605 values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
606 values[Anum_pg_index_indisready - 1] = BoolGetDatum(isready);
607 values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
608 values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false);
609 values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
610 values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
611 values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
612 values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
613 values[Anum_pg_index_indexprs - 1] = exprsDatum;
614 if (exprsDatum == (Datum) 0)
615 nulls[Anum_pg_index_indexprs - 1] = true;
616 values[Anum_pg_index_indpred - 1] = predDatum;
617 if (predDatum == (Datum) 0)
618 nulls[Anum_pg_index_indpred - 1] = true;
620 tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
623 * insert the tuple into the pg_index catalog
625 CatalogTupleInsert(pg_index, tuple);
628 * close the relation and free the tuple
630 table_close(pg_index, RowExclusiveLock);
631 heap_freetuple(tuple);
636 * index_create
638 * heapRelation: table to build index on (suitably locked by caller)
639 * indexRelationName: what it say
640 * indexRelationId: normally, pass InvalidOid to let this routine
641 * generate an OID for the index. During bootstrap this may be
642 * nonzero to specify a preselected OID.
643 * parentIndexRelid: if creating an index partition, the OID of the
644 * parent index; otherwise InvalidOid.
645 * parentConstraintId: if creating a constraint on a partition, the OID
646 * of the constraint in the parent; otherwise InvalidOid.
647 * relFileNode: normally, pass InvalidOid to get new storage. May be
648 * nonzero to attach an existing valid build.
649 * indexInfo: same info executor uses to insert into the index
650 * indexColNames: column names to use for index (List of char *)
651 * accessMethodObjectId: OID of index AM to use
652 * tableSpaceId: OID of tablespace to use
653 * collationObjectId: array of collation OIDs, one per index column
654 * classObjectId: array of index opclass OIDs, one per index column
655 * coloptions: array of per-index-column indoption settings
656 * reloptions: AM-specific options
657 * flags: bitmask that can include any combination of these bits:
658 * INDEX_CREATE_IS_PRIMARY
659 * the index is a primary key
660 * INDEX_CREATE_ADD_CONSTRAINT:
661 * invoke index_constraint_create also
662 * INDEX_CREATE_SKIP_BUILD:
663 * skip the index_build() step for the moment; caller must do it
664 * later (typically via reindex_index())
665 * INDEX_CREATE_CONCURRENT:
666 * do not lock the table against writers. The index will be
667 * marked "invalid" and the caller must take additional steps
668 * to fix it up.
669 * INDEX_CREATE_IF_NOT_EXISTS:
670 * do not throw an error if a relation with the same name
671 * already exists.
672 * INDEX_CREATE_PARTITIONED:
673 * create a partitioned index (table must be partitioned)
674 * constr_flags: flags passed to index_constraint_create
675 * (only if INDEX_CREATE_ADD_CONSTRAINT is set)
676 * allow_system_table_mods: allow table to be a system catalog
677 * is_internal: if true, post creation hook for new index
678 * constraintId: if not NULL, receives OID of created constraint
680 * Returns the OID of the created index.
683 index_create(Relation heapRelation,
684 const char *indexRelationName,
685 Oid indexRelationId,
686 Oid parentIndexRelid,
687 Oid parentConstraintId,
688 Oid relFileNode,
689 IndexInfo *indexInfo,
690 List *indexColNames,
691 Oid accessMethodObjectId,
692 Oid tableSpaceId,
693 Oid *collationObjectId,
694 Oid *classObjectId,
695 int16 *coloptions,
696 Datum reloptions,
697 bits16 flags,
698 bits16 constr_flags,
699 bool allow_system_table_mods,
700 bool is_internal,
701 Oid *constraintId)
703 Oid heapRelationId = RelationGetRelid(heapRelation);
704 Relation pg_class;
705 Relation indexRelation;
706 TupleDesc indexTupDesc;
707 bool shared_relation;
708 bool mapped_relation;
709 bool is_exclusion;
710 Oid namespaceId;
711 int i;
712 char relpersistence;
713 bool isprimary = (flags & INDEX_CREATE_IS_PRIMARY) != 0;
714 bool invalid = (flags & INDEX_CREATE_INVALID) != 0;
715 bool concurrent = (flags & INDEX_CREATE_CONCURRENT) != 0;
716 bool partitioned = (flags & INDEX_CREATE_PARTITIONED) != 0;
717 char relkind;
718 TransactionId relfrozenxid;
719 MultiXactId relminmxid;
721 /* constraint flags can only be set when a constraint is requested */
722 Assert((constr_flags == 0) ||
723 ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0));
724 /* partitioned indexes must never be "built" by themselves */
725 Assert(!partitioned || (flags & INDEX_CREATE_SKIP_BUILD));
727 relkind = partitioned ? RELKIND_PARTITIONED_INDEX : RELKIND_INDEX;
728 is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
730 pg_class = table_open(RelationRelationId, RowExclusiveLock);
733 * The index will be in the same namespace as its parent table, and is
734 * shared across databases if and only if the parent is. Likewise, it
735 * will use the relfilenode map if and only if the parent does; and it
736 * inherits the parent's relpersistence.
738 namespaceId = RelationGetNamespace(heapRelation);
739 shared_relation = heapRelation->rd_rel->relisshared;
740 mapped_relation = RelationIsMapped(heapRelation);
741 relpersistence = heapRelation->rd_rel->relpersistence;
744 * check parameters
746 if (indexInfo->ii_NumIndexAttrs < 1)
747 elog(ERROR, "must index at least one column");
749 if (!allow_system_table_mods &&
750 IsSystemRelation(heapRelation) &&
751 IsNormalProcessingMode())
752 ereport(ERROR,
753 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
754 errmsg("user-defined indexes on system catalog tables are not supported")));
757 * Btree text_pattern_ops uses text_eq as the equality operator, which is
758 * fine as long as the collation is deterministic; text_eq then reduces to
759 * bitwise equality and so it is semantically compatible with the other
760 * operators and functions in that opclass. But with a nondeterministic
761 * collation, text_eq could yield results that are incompatible with the
762 * actual behavior of the index (which is determined by the opclass's
763 * comparison function). We prevent such problems by refusing creation of
764 * an index with that opclass and a nondeterministic collation.
766 * The same applies to varchar_pattern_ops and bpchar_pattern_ops. If we
767 * find more cases, we might decide to create a real mechanism for marking
768 * opclasses as incompatible with nondeterminism; but for now, this small
769 * hack suffices.
771 * Another solution is to use a special operator, not text_eq, as the
772 * equality opclass member; but that is undesirable because it would
773 * prevent index usage in many queries that work fine today.
775 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
777 Oid collation = collationObjectId[i];
778 Oid opclass = classObjectId[i];
780 if (collation)
782 if ((opclass == TEXT_BTREE_PATTERN_OPS_OID ||
783 opclass == VARCHAR_BTREE_PATTERN_OPS_OID ||
784 opclass == BPCHAR_BTREE_PATTERN_OPS_OID) &&
785 !get_collation_isdeterministic(collation))
787 HeapTuple classtup;
789 classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
790 if (!HeapTupleIsValid(classtup))
791 elog(ERROR, "cache lookup failed for operator class %u", opclass);
792 ereport(ERROR,
793 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
794 errmsg("nondeterministic collations are not supported for operator class \"%s\"",
795 NameStr(((Form_pg_opclass) GETSTRUCT(classtup))->opcname))));
796 ReleaseSysCache(classtup);
802 * Concurrent index build on a system catalog is unsafe because we tend to
803 * release locks before committing in catalogs.
805 if (concurrent &&
806 IsCatalogRelation(heapRelation))
807 ereport(ERROR,
808 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
809 errmsg("concurrent index creation on system catalog tables is not supported")));
812 * This case is currently not supported. There's no way to ask for it in
813 * the grammar with CREATE INDEX, but it can happen with REINDEX.
815 if (concurrent && is_exclusion)
816 ereport(ERROR,
817 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
818 errmsg("concurrent index creation for exclusion constraints is not supported")));
821 * We cannot allow indexing a shared relation after initdb (because
822 * there's no way to make the entry in other databases' pg_class).
824 if (shared_relation && !IsBootstrapProcessingMode())
825 ereport(ERROR,
826 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
827 errmsg("shared indexes cannot be created after initdb")));
830 * Shared relations must be in pg_global, too (last-ditch check)
832 if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
833 elog(ERROR, "shared relations must be placed in pg_global tablespace");
836 * Check for duplicate name (both as to the index, and as to the
837 * associated constraint if any). Such cases would fail on the relevant
838 * catalogs' unique indexes anyway, but we prefer to give a friendlier
839 * error message.
841 if (get_relname_relid(indexRelationName, namespaceId))
843 if ((flags & INDEX_CREATE_IF_NOT_EXISTS) != 0)
845 ereport(NOTICE,
846 (errcode(ERRCODE_DUPLICATE_TABLE),
847 errmsg("relation \"%s\" already exists, skipping",
848 indexRelationName)));
849 table_close(pg_class, RowExclusiveLock);
850 return InvalidOid;
853 ereport(ERROR,
854 (errcode(ERRCODE_DUPLICATE_TABLE),
855 errmsg("relation \"%s\" already exists",
856 indexRelationName)));
859 if ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0 &&
860 ConstraintNameIsUsed(CONSTRAINT_RELATION, heapRelationId,
861 indexRelationName))
864 * INDEX_CREATE_IF_NOT_EXISTS does not apply here, since the
865 * conflicting constraint is not an index.
867 ereport(ERROR,
868 (errcode(ERRCODE_DUPLICATE_OBJECT),
869 errmsg("constraint \"%s\" for relation \"%s\" already exists",
870 indexRelationName, RelationGetRelationName(heapRelation))));
874 * construct tuple descriptor for index tuples
876 indexTupDesc = ConstructTupleDescriptor(heapRelation,
877 indexInfo,
878 indexColNames,
879 accessMethodObjectId,
880 collationObjectId,
881 classObjectId);
884 * Allocate an OID for the index, unless we were told what to use.
886 * The OID will be the relfilenode as well, so make sure it doesn't
887 * collide with either pg_class OIDs or existing physical files.
889 if (!OidIsValid(indexRelationId))
891 /* Use binary-upgrade override for pg_class.oid/relfilenode? */
892 if (IsBinaryUpgrade)
894 if (!OidIsValid(binary_upgrade_next_index_pg_class_oid))
895 ereport(ERROR,
896 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
897 errmsg("pg_class index OID value not set when in binary upgrade mode")));
899 indexRelationId = binary_upgrade_next_index_pg_class_oid;
900 binary_upgrade_next_index_pg_class_oid = InvalidOid;
902 else
904 indexRelationId =
905 GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
910 * create the index relation's relcache entry and, if necessary, the
911 * physical disk file. (If we fail further down, it's the smgr's
912 * responsibility to remove the disk file again, if any.)
914 indexRelation = heap_create(indexRelationName,
915 namespaceId,
916 tableSpaceId,
917 indexRelationId,
918 relFileNode,
919 accessMethodObjectId,
920 indexTupDesc,
921 relkind,
922 relpersistence,
923 shared_relation,
924 mapped_relation,
925 allow_system_table_mods,
926 &relfrozenxid,
927 &relminmxid);
929 Assert(relfrozenxid == InvalidTransactionId);
930 Assert(relminmxid == InvalidMultiXactId);
931 Assert(indexRelationId == RelationGetRelid(indexRelation));
934 * Obtain exclusive lock on it. Although no other transactions can see it
935 * until we commit, this prevents deadlock-risk complaints from lock
936 * manager in cases such as CLUSTER.
938 LockRelation(indexRelation, AccessExclusiveLock);
941 * Fill in fields of the index's pg_class entry that are not set correctly
942 * by heap_create.
944 * XXX should have a cleaner way to create cataloged indexes
946 indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
947 indexRelation->rd_rel->relam = accessMethodObjectId;
948 indexRelation->rd_rel->relispartition = OidIsValid(parentIndexRelid);
951 * store index's pg_class entry
953 InsertPgClassTuple(pg_class, indexRelation,
954 RelationGetRelid(indexRelation),
955 (Datum) 0,
956 reloptions);
958 /* done with pg_class */
959 table_close(pg_class, RowExclusiveLock);
962 * now update the object id's of all the attribute tuple forms in the
963 * index relation's tuple descriptor
965 InitializeAttributeOids(indexRelation,
966 indexInfo->ii_NumIndexAttrs,
967 indexRelationId);
970 * append ATTRIBUTE tuples for the index
972 AppendAttributeTuples(indexRelation, indexInfo->ii_OpclassOptions);
974 /* ----------------
975 * update pg_index
976 * (append INDEX tuple)
978 * Note that this stows away a representation of "predicate".
979 * (Or, could define a rule to maintain the predicate) --Nels, Feb '92
980 * ----------------
982 UpdateIndexRelation(indexRelationId, heapRelationId, parentIndexRelid,
983 indexInfo,
984 collationObjectId, classObjectId, coloptions,
985 isprimary, is_exclusion,
986 (constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) == 0,
987 !concurrent && !invalid,
988 !concurrent);
991 * Register relcache invalidation on the indexes' heap relation, to
992 * maintain consistency of its index list
994 CacheInvalidateRelcache(heapRelation);
996 /* update pg_inherits and the parent's relhassubclass, if needed */
997 if (OidIsValid(parentIndexRelid))
999 StoreSingleInheritance(indexRelationId, parentIndexRelid, 1);
1000 SetRelationHasSubclass(parentIndexRelid, true);
1004 * Register constraint and dependencies for the index.
1006 * If the index is from a CONSTRAINT clause, construct a pg_constraint
1007 * entry. The index will be linked to the constraint, which in turn is
1008 * linked to the table. If it's not a CONSTRAINT, we need to make a
1009 * dependency directly on the table.
1011 * We don't need a dependency on the namespace, because there'll be an
1012 * indirect dependency via our parent table.
1014 * During bootstrap we can't register any dependencies, and we don't try
1015 * to make a constraint either.
1017 if (!IsBootstrapProcessingMode())
1019 ObjectAddress myself,
1020 referenced;
1022 ObjectAddressSet(myself, RelationRelationId, indexRelationId);
1024 if ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0)
1026 char constraintType;
1027 ObjectAddress localaddr;
1029 if (isprimary)
1030 constraintType = CONSTRAINT_PRIMARY;
1031 else if (indexInfo->ii_Unique)
1032 constraintType = CONSTRAINT_UNIQUE;
1033 else if (is_exclusion)
1034 constraintType = CONSTRAINT_EXCLUSION;
1035 else
1037 elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
1038 constraintType = 0; /* keep compiler quiet */
1041 localaddr = index_constraint_create(heapRelation,
1042 indexRelationId,
1043 parentConstraintId,
1044 indexInfo,
1045 indexRelationName,
1046 constraintType,
1047 constr_flags,
1048 allow_system_table_mods,
1049 is_internal);
1050 if (constraintId)
1051 *constraintId = localaddr.objectId;
1053 else
1055 bool have_simple_col = false;
1057 /* Create auto dependencies on simply-referenced columns */
1058 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1060 if (indexInfo->ii_IndexAttrNumbers[i] != 0)
1062 ObjectAddressSubSet(referenced, RelationRelationId,
1063 heapRelationId,
1064 indexInfo->ii_IndexAttrNumbers[i]);
1065 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
1066 have_simple_col = true;
1071 * If there are no simply-referenced columns, give the index an
1072 * auto dependency on the whole table. In most cases, this will
1073 * be redundant, but it might not be if the index expressions and
1074 * predicate contain no Vars or only whole-row Vars.
1076 if (!have_simple_col)
1078 ObjectAddressSet(referenced, RelationRelationId,
1079 heapRelationId);
1080 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
1085 * If this is an index partition, create partition dependencies on
1086 * both the parent index and the table. (Note: these must be *in
1087 * addition to*, not instead of, all other dependencies. Otherwise
1088 * we'll be short some dependencies after DETACH PARTITION.)
1090 if (OidIsValid(parentIndexRelid))
1092 ObjectAddressSet(referenced, RelationRelationId, parentIndexRelid);
1093 recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_PRI);
1095 ObjectAddressSet(referenced, RelationRelationId, heapRelationId);
1096 recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_SEC);
1099 /* Store dependency on collations */
1100 /* The default collation is pinned, so don't bother recording it */
1101 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1103 if (OidIsValid(collationObjectId[i]) &&
1104 collationObjectId[i] != DEFAULT_COLLATION_OID)
1106 ObjectAddressSet(referenced, CollationRelationId,
1107 collationObjectId[i]);
1108 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1112 /* Store dependency on operator classes */
1113 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1115 ObjectAddressSet(referenced, OperatorClassRelationId, classObjectId[i]);
1116 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1119 /* Store dependencies on anything mentioned in index expressions */
1120 if (indexInfo->ii_Expressions)
1122 recordDependencyOnSingleRelExpr(&myself,
1123 (Node *) indexInfo->ii_Expressions,
1124 heapRelationId,
1125 DEPENDENCY_NORMAL,
1126 DEPENDENCY_AUTO, false);
1129 /* Store dependencies on anything mentioned in predicate */
1130 if (indexInfo->ii_Predicate)
1132 recordDependencyOnSingleRelExpr(&myself,
1133 (Node *) indexInfo->ii_Predicate,
1134 heapRelationId,
1135 DEPENDENCY_NORMAL,
1136 DEPENDENCY_AUTO, false);
1139 else
1141 /* Bootstrap mode - assert we weren't asked for constraint support */
1142 Assert((flags & INDEX_CREATE_ADD_CONSTRAINT) == 0);
1145 /* Post creation hook for new index */
1146 InvokeObjectPostCreateHookArg(RelationRelationId,
1147 indexRelationId, 0, is_internal);
1150 * Advance the command counter so that we can see the newly-entered
1151 * catalog tuples for the index.
1153 CommandCounterIncrement();
1156 * In bootstrap mode, we have to fill in the index strategy structure with
1157 * information from the catalogs. If we aren't bootstrapping, then the
1158 * relcache entry has already been rebuilt thanks to sinval update during
1159 * CommandCounterIncrement.
1161 if (IsBootstrapProcessingMode())
1162 RelationInitIndexAccessInfo(indexRelation);
1163 else
1164 Assert(indexRelation->rd_indexcxt != NULL);
1166 indexRelation->rd_index->indnkeyatts = indexInfo->ii_NumIndexKeyAttrs;
1168 /* Validate opclass-specific options */
1169 if (indexInfo->ii_OpclassOptions)
1170 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1171 (void) index_opclass_options(indexRelation, i + 1,
1172 indexInfo->ii_OpclassOptions[i],
1173 true);
1176 * If this is bootstrap (initdb) time, then we don't actually fill in the
1177 * index yet. We'll be creating more indexes and classes later, so we
1178 * delay filling them in until just before we're done with bootstrapping.
1179 * Similarly, if the caller specified to skip the build then filling the
1180 * index is delayed till later (ALTER TABLE can save work in some cases
1181 * with this). Otherwise, we call the AM routine that constructs the
1182 * index.
1184 if (IsBootstrapProcessingMode())
1186 index_register(heapRelationId, indexRelationId, indexInfo);
1188 else if ((flags & INDEX_CREATE_SKIP_BUILD) != 0)
1191 * Caller is responsible for filling the index later on. However,
1192 * we'd better make sure that the heap relation is correctly marked as
1193 * having an index.
1195 index_update_stats(heapRelation,
1196 true,
1197 -1.0);
1198 /* Make the above update visible */
1199 CommandCounterIncrement();
1201 else
1203 index_build(heapRelation, indexRelation, indexInfo, false, true);
1207 * Close the index; but we keep the lock that we acquired above until end
1208 * of transaction. Closing the heap is caller's responsibility.
1210 index_close(indexRelation, NoLock);
1212 return indexRelationId;
1216 * index_concurrently_create_copy
1218 * Create concurrently an index based on the definition of the one provided by
1219 * caller. The index is inserted into catalogs and needs to be built later
1220 * on. This is called during concurrent reindex processing.
1223 index_concurrently_create_copy(Relation heapRelation, Oid oldIndexId, const char *newName)
1225 Relation indexRelation;
1226 IndexInfo *oldInfo,
1227 *newInfo;
1228 Oid newIndexId = InvalidOid;
1229 HeapTuple indexTuple,
1230 classTuple;
1231 Datum indclassDatum,
1232 colOptionDatum,
1233 optionDatum;
1234 oidvector *indclass;
1235 int2vector *indcoloptions;
1236 bool isnull;
1237 List *indexColNames = NIL;
1238 List *indexExprs = NIL;
1239 List *indexPreds = NIL;
1241 indexRelation = index_open(oldIndexId, RowExclusiveLock);
1243 /* The new index needs some information from the old index */
1244 oldInfo = BuildIndexInfo(indexRelation);
1247 * Concurrent build of an index with exclusion constraints is not
1248 * supported.
1250 if (oldInfo->ii_ExclusionOps != NULL)
1251 ereport(ERROR,
1252 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1253 errmsg("concurrent index creation for exclusion constraints is not supported")));
1255 /* Get the array of class and column options IDs from index info */
1256 indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldIndexId));
1257 if (!HeapTupleIsValid(indexTuple))
1258 elog(ERROR, "cache lookup failed for index %u", oldIndexId);
1259 indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1260 Anum_pg_index_indclass, &isnull);
1261 Assert(!isnull);
1262 indclass = (oidvector *) DatumGetPointer(indclassDatum);
1264 colOptionDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1265 Anum_pg_index_indoption, &isnull);
1266 Assert(!isnull);
1267 indcoloptions = (int2vector *) DatumGetPointer(colOptionDatum);
1269 /* Fetch options of index if any */
1270 classTuple = SearchSysCache1(RELOID, oldIndexId);
1271 if (!HeapTupleIsValid(classTuple))
1272 elog(ERROR, "cache lookup failed for relation %u", oldIndexId);
1273 optionDatum = SysCacheGetAttr(RELOID, classTuple,
1274 Anum_pg_class_reloptions, &isnull);
1277 * Fetch the list of expressions and predicates directly from the
1278 * catalogs. This cannot rely on the information from IndexInfo of the
1279 * old index as these have been flattened for the planner.
1281 if (oldInfo->ii_Expressions != NIL)
1283 Datum exprDatum;
1284 char *exprString;
1286 exprDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1287 Anum_pg_index_indexprs, &isnull);
1288 Assert(!isnull);
1289 exprString = TextDatumGetCString(exprDatum);
1290 indexExprs = (List *) stringToNode(exprString);
1291 pfree(exprString);
1293 if (oldInfo->ii_Predicate != NIL)
1295 Datum predDatum;
1296 char *predString;
1298 predDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1299 Anum_pg_index_indpred, &isnull);
1300 Assert(!isnull);
1301 predString = TextDatumGetCString(predDatum);
1302 indexPreds = (List *) stringToNode(predString);
1304 /* Also convert to implicit-AND format */
1305 indexPreds = make_ands_implicit((Expr *) indexPreds);
1306 pfree(predString);
1310 * Build the index information for the new index. Note that rebuild of
1311 * indexes with exclusion constraints is not supported, hence there is no
1312 * need to fill all the ii_Exclusion* fields.
1314 newInfo = makeIndexInfo(oldInfo->ii_NumIndexAttrs,
1315 oldInfo->ii_NumIndexKeyAttrs,
1316 oldInfo->ii_Am,
1317 indexExprs,
1318 indexPreds,
1319 oldInfo->ii_Unique,
1320 false, /* not ready for inserts */
1321 true);
1324 * Extract the list of column names and the column numbers for the new
1325 * index information. All this information will be used for the index
1326 * creation.
1328 for (int i = 0; i < oldInfo->ii_NumIndexAttrs; i++)
1330 TupleDesc indexTupDesc = RelationGetDescr(indexRelation);
1331 Form_pg_attribute att = TupleDescAttr(indexTupDesc, i);
1333 indexColNames = lappend(indexColNames, NameStr(att->attname));
1334 newInfo->ii_IndexAttrNumbers[i] = oldInfo->ii_IndexAttrNumbers[i];
1338 * Now create the new index.
1340 * For a partition index, we adjust the partition dependency later, to
1341 * ensure a consistent state at all times. That is why parentIndexRelid
1342 * is not set here.
1344 newIndexId = index_create(heapRelation,
1345 newName,
1346 InvalidOid, /* indexRelationId */
1347 InvalidOid, /* parentIndexRelid */
1348 InvalidOid, /* parentConstraintId */
1349 InvalidOid, /* relFileNode */
1350 newInfo,
1351 indexColNames,
1352 indexRelation->rd_rel->relam,
1353 indexRelation->rd_rel->reltablespace,
1354 indexRelation->rd_indcollation,
1355 indclass->values,
1356 indcoloptions->values,
1357 optionDatum,
1358 INDEX_CREATE_SKIP_BUILD | INDEX_CREATE_CONCURRENT,
1360 true, /* allow table to be a system catalog? */
1361 false, /* is_internal? */
1362 NULL);
1364 /* Close the relations used and clean up */
1365 index_close(indexRelation, NoLock);
1366 ReleaseSysCache(indexTuple);
1367 ReleaseSysCache(classTuple);
1369 return newIndexId;
1373 * index_concurrently_build
1375 * Build index for a concurrent operation. Low-level locks are taken when
1376 * this operation is performed to prevent only schema changes, but they need
1377 * to be kept until the end of the transaction performing this operation.
1378 * 'indexOid' refers to an index relation OID already created as part of
1379 * previous processing, and 'heapOid' refers to its parent heap relation.
1381 void
1382 index_concurrently_build(Oid heapRelationId,
1383 Oid indexRelationId)
1385 Relation heapRel;
1386 Relation indexRelation;
1387 IndexInfo *indexInfo;
1389 /* This had better make sure that a snapshot is active */
1390 Assert(ActiveSnapshotSet());
1392 /* Open and lock the parent heap relation */
1393 heapRel = table_open(heapRelationId, ShareUpdateExclusiveLock);
1395 /* And the target index relation */
1396 indexRelation = index_open(indexRelationId, RowExclusiveLock);
1399 * We have to re-build the IndexInfo struct, since it was lost in the
1400 * commit of the transaction where this concurrent index was created at
1401 * the catalog level.
1403 indexInfo = BuildIndexInfo(indexRelation);
1404 Assert(!indexInfo->ii_ReadyForInserts);
1405 indexInfo->ii_Concurrent = true;
1406 indexInfo->ii_BrokenHotChain = false;
1408 /* Now build the index */
1409 index_build(heapRel, indexRelation, indexInfo, false, true);
1411 /* Close both the relations, but keep the locks */
1412 table_close(heapRel, NoLock);
1413 index_close(indexRelation, NoLock);
1416 * Update the pg_index row to mark the index as ready for inserts. Once we
1417 * commit this transaction, any new transactions that open the table must
1418 * insert new entries into the index for insertions and non-HOT updates.
1420 index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
1424 * index_concurrently_swap
1426 * Swap name, dependencies, and constraints of the old index over to the new
1427 * index, while marking the old index as invalid and the new as valid.
1429 void
1430 index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName)
1432 Relation pg_class,
1433 pg_index,
1434 pg_constraint,
1435 pg_trigger;
1436 Relation oldClassRel,
1437 newClassRel;
1438 HeapTuple oldClassTuple,
1439 newClassTuple;
1440 Form_pg_class oldClassForm,
1441 newClassForm;
1442 HeapTuple oldIndexTuple,
1443 newIndexTuple;
1444 Form_pg_index oldIndexForm,
1445 newIndexForm;
1446 bool isPartition;
1447 Oid indexConstraintOid;
1448 List *constraintOids = NIL;
1449 ListCell *lc;
1452 * Take a necessary lock on the old and new index before swapping them.
1454 oldClassRel = relation_open(oldIndexId, ShareUpdateExclusiveLock);
1455 newClassRel = relation_open(newIndexId, ShareUpdateExclusiveLock);
1457 /* Now swap names and dependencies of those indexes */
1458 pg_class = table_open(RelationRelationId, RowExclusiveLock);
1460 oldClassTuple = SearchSysCacheCopy1(RELOID,
1461 ObjectIdGetDatum(oldIndexId));
1462 if (!HeapTupleIsValid(oldClassTuple))
1463 elog(ERROR, "could not find tuple for relation %u", oldIndexId);
1464 newClassTuple = SearchSysCacheCopy1(RELOID,
1465 ObjectIdGetDatum(newIndexId));
1466 if (!HeapTupleIsValid(newClassTuple))
1467 elog(ERROR, "could not find tuple for relation %u", newIndexId);
1469 oldClassForm = (Form_pg_class) GETSTRUCT(oldClassTuple);
1470 newClassForm = (Form_pg_class) GETSTRUCT(newClassTuple);
1472 /* Swap the names */
1473 namestrcpy(&newClassForm->relname, NameStr(oldClassForm->relname));
1474 namestrcpy(&oldClassForm->relname, oldName);
1476 /* Swap the partition flags to track inheritance properly */
1477 isPartition = newClassForm->relispartition;
1478 newClassForm->relispartition = oldClassForm->relispartition;
1479 oldClassForm->relispartition = isPartition;
1481 CatalogTupleUpdate(pg_class, &oldClassTuple->t_self, oldClassTuple);
1482 CatalogTupleUpdate(pg_class, &newClassTuple->t_self, newClassTuple);
1484 heap_freetuple(oldClassTuple);
1485 heap_freetuple(newClassTuple);
1487 /* Now swap index info */
1488 pg_index = table_open(IndexRelationId, RowExclusiveLock);
1490 oldIndexTuple = SearchSysCacheCopy1(INDEXRELID,
1491 ObjectIdGetDatum(oldIndexId));
1492 if (!HeapTupleIsValid(oldIndexTuple))
1493 elog(ERROR, "could not find tuple for relation %u", oldIndexId);
1494 newIndexTuple = SearchSysCacheCopy1(INDEXRELID,
1495 ObjectIdGetDatum(newIndexId));
1496 if (!HeapTupleIsValid(newIndexTuple))
1497 elog(ERROR, "could not find tuple for relation %u", newIndexId);
1499 oldIndexForm = (Form_pg_index) GETSTRUCT(oldIndexTuple);
1500 newIndexForm = (Form_pg_index) GETSTRUCT(newIndexTuple);
1503 * Copy constraint flags from the old index. This is safe because the old
1504 * index guaranteed uniqueness.
1506 newIndexForm->indisprimary = oldIndexForm->indisprimary;
1507 oldIndexForm->indisprimary = false;
1508 newIndexForm->indisexclusion = oldIndexForm->indisexclusion;
1509 oldIndexForm->indisexclusion = false;
1510 newIndexForm->indimmediate = oldIndexForm->indimmediate;
1511 oldIndexForm->indimmediate = true;
1513 /* Preserve indisreplident in the new index */
1514 newIndexForm->indisreplident = oldIndexForm->indisreplident;
1515 oldIndexForm->indisreplident = false;
1517 /* Preserve indisclustered in the new index */
1518 newIndexForm->indisclustered = oldIndexForm->indisclustered;
1521 * Mark the new index as valid, and the old index as invalid similarly to
1522 * what index_set_state_flags() does.
1524 newIndexForm->indisvalid = true;
1525 oldIndexForm->indisvalid = false;
1526 oldIndexForm->indisclustered = false;
1528 CatalogTupleUpdate(pg_index, &oldIndexTuple->t_self, oldIndexTuple);
1529 CatalogTupleUpdate(pg_index, &newIndexTuple->t_self, newIndexTuple);
1531 heap_freetuple(oldIndexTuple);
1532 heap_freetuple(newIndexTuple);
1535 * Move constraints and triggers over to the new index
1538 constraintOids = get_index_ref_constraints(oldIndexId);
1540 indexConstraintOid = get_index_constraint(oldIndexId);
1542 if (OidIsValid(indexConstraintOid))
1543 constraintOids = lappend_oid(constraintOids, indexConstraintOid);
1545 pg_constraint = table_open(ConstraintRelationId, RowExclusiveLock);
1546 pg_trigger = table_open(TriggerRelationId, RowExclusiveLock);
1548 foreach(lc, constraintOids)
1550 HeapTuple constraintTuple,
1551 triggerTuple;
1552 Form_pg_constraint conForm;
1553 ScanKeyData key[1];
1554 SysScanDesc scan;
1555 Oid constraintOid = lfirst_oid(lc);
1557 /* Move the constraint from the old to the new index */
1558 constraintTuple = SearchSysCacheCopy1(CONSTROID,
1559 ObjectIdGetDatum(constraintOid));
1560 if (!HeapTupleIsValid(constraintTuple))
1561 elog(ERROR, "could not find tuple for constraint %u", constraintOid);
1563 conForm = ((Form_pg_constraint) GETSTRUCT(constraintTuple));
1565 if (conForm->conindid == oldIndexId)
1567 conForm->conindid = newIndexId;
1569 CatalogTupleUpdate(pg_constraint, &constraintTuple->t_self, constraintTuple);
1572 heap_freetuple(constraintTuple);
1574 /* Search for trigger records */
1575 ScanKeyInit(&key[0],
1576 Anum_pg_trigger_tgconstraint,
1577 BTEqualStrategyNumber, F_OIDEQ,
1578 ObjectIdGetDatum(constraintOid));
1580 scan = systable_beginscan(pg_trigger, TriggerConstraintIndexId, true,
1581 NULL, 1, key);
1583 while (HeapTupleIsValid((triggerTuple = systable_getnext(scan))))
1585 Form_pg_trigger tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple);
1587 if (tgForm->tgconstrindid != oldIndexId)
1588 continue;
1590 /* Make a modifiable copy */
1591 triggerTuple = heap_copytuple(triggerTuple);
1592 tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple);
1594 tgForm->tgconstrindid = newIndexId;
1596 CatalogTupleUpdate(pg_trigger, &triggerTuple->t_self, triggerTuple);
1598 heap_freetuple(triggerTuple);
1601 systable_endscan(scan);
1605 * Move comment if any
1608 Relation description;
1609 ScanKeyData skey[3];
1610 SysScanDesc sd;
1611 HeapTuple tuple;
1612 Datum values[Natts_pg_description] = {0};
1613 bool nulls[Natts_pg_description] = {0};
1614 bool replaces[Natts_pg_description] = {0};
1616 values[Anum_pg_description_objoid - 1] = ObjectIdGetDatum(newIndexId);
1617 replaces[Anum_pg_description_objoid - 1] = true;
1619 ScanKeyInit(&skey[0],
1620 Anum_pg_description_objoid,
1621 BTEqualStrategyNumber, F_OIDEQ,
1622 ObjectIdGetDatum(oldIndexId));
1623 ScanKeyInit(&skey[1],
1624 Anum_pg_description_classoid,
1625 BTEqualStrategyNumber, F_OIDEQ,
1626 ObjectIdGetDatum(RelationRelationId));
1627 ScanKeyInit(&skey[2],
1628 Anum_pg_description_objsubid,
1629 BTEqualStrategyNumber, F_INT4EQ,
1630 Int32GetDatum(0));
1632 description = table_open(DescriptionRelationId, RowExclusiveLock);
1634 sd = systable_beginscan(description, DescriptionObjIndexId, true,
1635 NULL, 3, skey);
1637 while ((tuple = systable_getnext(sd)) != NULL)
1639 tuple = heap_modify_tuple(tuple, RelationGetDescr(description),
1640 values, nulls, replaces);
1641 CatalogTupleUpdate(description, &tuple->t_self, tuple);
1643 break; /* Assume there can be only one match */
1646 systable_endscan(sd);
1647 table_close(description, NoLock);
1651 * Swap inheritance relationship with parent index
1653 if (get_rel_relispartition(oldIndexId))
1655 List *ancestors = get_partition_ancestors(oldIndexId);
1656 Oid parentIndexRelid = linitial_oid(ancestors);
1658 DeleteInheritsTuple(oldIndexId, parentIndexRelid);
1659 StoreSingleInheritance(newIndexId, parentIndexRelid, 1);
1661 list_free(ancestors);
1665 * Swap all dependencies of and on the old index to the new one, and
1666 * vice-versa. Note that a call to CommandCounterIncrement() would cause
1667 * duplicate entries in pg_depend, so this should not be done.
1669 changeDependenciesOf(RelationRelationId, newIndexId, oldIndexId);
1670 changeDependenciesOn(RelationRelationId, newIndexId, oldIndexId);
1672 changeDependenciesOf(RelationRelationId, oldIndexId, newIndexId);
1673 changeDependenciesOn(RelationRelationId, oldIndexId, newIndexId);
1676 * Copy over statistics from old to new index
1679 PgStat_StatTabEntry *tabentry;
1681 tabentry = pgstat_fetch_stat_tabentry(oldIndexId);
1682 if (tabentry)
1684 if (newClassRel->pgstat_info)
1686 newClassRel->pgstat_info->t_counts.t_numscans = tabentry->numscans;
1687 newClassRel->pgstat_info->t_counts.t_tuples_returned = tabentry->tuples_returned;
1688 newClassRel->pgstat_info->t_counts.t_tuples_fetched = tabentry->tuples_fetched;
1689 newClassRel->pgstat_info->t_counts.t_blocks_fetched = tabentry->blocks_fetched;
1690 newClassRel->pgstat_info->t_counts.t_blocks_hit = tabentry->blocks_hit;
1693 * The data will be sent by the next pgstat_report_stat()
1694 * call.
1700 /* Close relations */
1701 table_close(pg_class, RowExclusiveLock);
1702 table_close(pg_index, RowExclusiveLock);
1703 table_close(pg_constraint, RowExclusiveLock);
1704 table_close(pg_trigger, RowExclusiveLock);
1706 /* The lock taken previously is not released until the end of transaction */
1707 relation_close(oldClassRel, NoLock);
1708 relation_close(newClassRel, NoLock);
1712 * index_concurrently_set_dead
1714 * Perform the last invalidation stage of DROP INDEX CONCURRENTLY or REINDEX
1715 * CONCURRENTLY before actually dropping the index. After calling this
1716 * function, the index is seen by all the backends as dead. Low-level locks
1717 * taken here are kept until the end of the transaction calling this function.
1719 void
1720 index_concurrently_set_dead(Oid heapId, Oid indexId)
1722 Relation userHeapRelation;
1723 Relation userIndexRelation;
1726 * No more predicate locks will be acquired on this index, and we're about
1727 * to stop doing inserts into the index which could show conflicts with
1728 * existing predicate locks, so now is the time to move them to the heap
1729 * relation.
1731 userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock);
1732 userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
1733 TransferPredicateLocksToHeapRelation(userIndexRelation);
1736 * Now we are sure that nobody uses the index for queries; they just might
1737 * have it open for updating it. So now we can unset indisready and
1738 * indislive, then wait till nobody could be using it at all anymore.
1740 index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
1743 * Invalidate the relcache for the table, so that after this commit all
1744 * sessions will refresh the table's index list. Forgetting just the
1745 * index's relcache entry is not enough.
1747 CacheInvalidateRelcache(userHeapRelation);
1750 * Close the relations again, though still holding session lock.
1752 table_close(userHeapRelation, NoLock);
1753 index_close(userIndexRelation, NoLock);
1757 * index_constraint_create
1759 * Set up a constraint associated with an index. Return the new constraint's
1760 * address.
1762 * heapRelation: table owning the index (must be suitably locked by caller)
1763 * indexRelationId: OID of the index
1764 * parentConstraintId: if constraint is on a partition, the OID of the
1765 * constraint in the parent.
1766 * indexInfo: same info executor uses to insert into the index
1767 * constraintName: what it say (generally, should match name of index)
1768 * constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
1769 * CONSTRAINT_EXCLUSION
1770 * flags: bitmask that can include any combination of these bits:
1771 * INDEX_CONSTR_CREATE_MARK_AS_PRIMARY: index is a PRIMARY KEY
1772 * INDEX_CONSTR_CREATE_DEFERRABLE: constraint is DEFERRABLE
1773 * INDEX_CONSTR_CREATE_INIT_DEFERRED: constraint is INITIALLY DEFERRED
1774 * INDEX_CONSTR_CREATE_UPDATE_INDEX: update the pg_index row
1775 * INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS: remove existing dependencies
1776 * of index on table's columns
1777 * allow_system_table_mods: allow table to be a system catalog
1778 * is_internal: index is constructed due to internal process
1780 ObjectAddress
1781 index_constraint_create(Relation heapRelation,
1782 Oid indexRelationId,
1783 Oid parentConstraintId,
1784 IndexInfo *indexInfo,
1785 const char *constraintName,
1786 char constraintType,
1787 bits16 constr_flags,
1788 bool allow_system_table_mods,
1789 bool is_internal)
1791 Oid namespaceId = RelationGetNamespace(heapRelation);
1792 ObjectAddress myself,
1793 idxaddr;
1794 Oid conOid;
1795 bool deferrable;
1796 bool initdeferred;
1797 bool mark_as_primary;
1798 bool islocal;
1799 bool noinherit;
1800 int inhcount;
1802 deferrable = (constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) != 0;
1803 initdeferred = (constr_flags & INDEX_CONSTR_CREATE_INIT_DEFERRED) != 0;
1804 mark_as_primary = (constr_flags & INDEX_CONSTR_CREATE_MARK_AS_PRIMARY) != 0;
1806 /* constraint creation support doesn't work while bootstrapping */
1807 Assert(!IsBootstrapProcessingMode());
1809 /* enforce system-table restriction */
1810 if (!allow_system_table_mods &&
1811 IsSystemRelation(heapRelation) &&
1812 IsNormalProcessingMode())
1813 ereport(ERROR,
1814 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1815 errmsg("user-defined indexes on system catalog tables are not supported")));
1817 /* primary/unique constraints shouldn't have any expressions */
1818 if (indexInfo->ii_Expressions &&
1819 constraintType != CONSTRAINT_EXCLUSION)
1820 elog(ERROR, "constraints cannot have index expressions");
1823 * If we're manufacturing a constraint for a pre-existing index, we need
1824 * to get rid of the existing auto dependencies for the index (the ones
1825 * that index_create() would have made instead of calling this function).
1827 * Note: this code would not necessarily do the right thing if the index
1828 * has any expressions or predicate, but we'd never be turning such an
1829 * index into a UNIQUE or PRIMARY KEY constraint.
1831 if (constr_flags & INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS)
1832 deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
1833 RelationRelationId, DEPENDENCY_AUTO);
1835 if (OidIsValid(parentConstraintId))
1837 islocal = false;
1838 inhcount = 1;
1839 noinherit = false;
1841 else
1843 islocal = true;
1844 inhcount = 0;
1845 noinherit = true;
1849 * Construct a pg_constraint entry.
1851 conOid = CreateConstraintEntry(constraintName,
1852 namespaceId,
1853 constraintType,
1854 deferrable,
1855 initdeferred,
1856 true,
1857 parentConstraintId,
1858 RelationGetRelid(heapRelation),
1859 indexInfo->ii_IndexAttrNumbers,
1860 indexInfo->ii_NumIndexKeyAttrs,
1861 indexInfo->ii_NumIndexAttrs,
1862 InvalidOid, /* no domain */
1863 indexRelationId, /* index OID */
1864 InvalidOid, /* no foreign key */
1865 NULL,
1866 NULL,
1867 NULL,
1868 NULL,
1870 ' ',
1871 ' ',
1872 ' ',
1873 indexInfo->ii_ExclusionOps,
1874 NULL, /* no check constraint */
1875 NULL,
1876 islocal,
1877 inhcount,
1878 noinherit,
1879 is_internal);
1882 * Register the index as internally dependent on the constraint.
1884 * Note that the constraint has a dependency on the table, so we don't
1885 * need (or want) any direct dependency from the index to the table.
1887 ObjectAddressSet(myself, ConstraintRelationId, conOid);
1888 ObjectAddressSet(idxaddr, RelationRelationId, indexRelationId);
1889 recordDependencyOn(&idxaddr, &myself, DEPENDENCY_INTERNAL);
1892 * Also, if this is a constraint on a partition, give it partition-type
1893 * dependencies on the parent constraint as well as the table.
1895 if (OidIsValid(parentConstraintId))
1897 ObjectAddress referenced;
1899 ObjectAddressSet(referenced, ConstraintRelationId, parentConstraintId);
1900 recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_PRI);
1901 ObjectAddressSet(referenced, RelationRelationId,
1902 RelationGetRelid(heapRelation));
1903 recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_SEC);
1907 * If the constraint is deferrable, create the deferred uniqueness
1908 * checking trigger. (The trigger will be given an internal dependency on
1909 * the constraint by CreateTrigger.)
1911 if (deferrable)
1913 CreateTrigStmt *trigger;
1915 trigger = makeNode(CreateTrigStmt);
1916 trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
1917 "PK_ConstraintTrigger" :
1918 "Unique_ConstraintTrigger";
1919 trigger->relation = NULL;
1920 trigger->funcname = SystemFuncName("unique_key_recheck");
1921 trigger->args = NIL;
1922 trigger->row = true;
1923 trigger->timing = TRIGGER_TYPE_AFTER;
1924 trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
1925 trigger->columns = NIL;
1926 trigger->whenClause = NULL;
1927 trigger->isconstraint = true;
1928 trigger->deferrable = true;
1929 trigger->initdeferred = initdeferred;
1930 trigger->constrrel = NULL;
1932 (void) CreateTrigger(trigger, NULL, RelationGetRelid(heapRelation),
1933 InvalidOid, conOid, indexRelationId, InvalidOid,
1934 InvalidOid, NULL, true, false);
1938 * If needed, mark the index as primary and/or deferred in pg_index.
1940 * Note: When making an existing index into a constraint, caller must have
1941 * a table lock that prevents concurrent table updates; otherwise, there
1942 * is a risk that concurrent readers of the table will miss seeing this
1943 * index at all.
1945 if ((constr_flags & INDEX_CONSTR_CREATE_UPDATE_INDEX) &&
1946 (mark_as_primary || deferrable))
1948 Relation pg_index;
1949 HeapTuple indexTuple;
1950 Form_pg_index indexForm;
1951 bool dirty = false;
1953 pg_index = table_open(IndexRelationId, RowExclusiveLock);
1955 indexTuple = SearchSysCacheCopy1(INDEXRELID,
1956 ObjectIdGetDatum(indexRelationId));
1957 if (!HeapTupleIsValid(indexTuple))
1958 elog(ERROR, "cache lookup failed for index %u", indexRelationId);
1959 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1961 if (mark_as_primary && !indexForm->indisprimary)
1963 indexForm->indisprimary = true;
1964 dirty = true;
1967 if (deferrable && indexForm->indimmediate)
1969 indexForm->indimmediate = false;
1970 dirty = true;
1973 if (dirty)
1975 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
1977 InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
1978 InvalidOid, is_internal);
1981 heap_freetuple(indexTuple);
1982 table_close(pg_index, RowExclusiveLock);
1985 return myself;
1989 * index_drop
1991 * NOTE: this routine should now only be called through performDeletion(),
1992 * else associated dependencies won't be cleaned up.
1994 * If concurrent is true, do a DROP INDEX CONCURRENTLY. If concurrent is
1995 * false but concurrent_lock_mode is true, then do a normal DROP INDEX but
1996 * take a lock for CONCURRENTLY processing. That is used as part of REINDEX
1997 * CONCURRENTLY.
1999 void
2000 index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
2002 Oid heapId;
2003 Relation userHeapRelation;
2004 Relation userIndexRelation;
2005 Relation indexRelation;
2006 HeapTuple tuple;
2007 bool hasexprs;
2008 LockRelId heaprelid,
2009 indexrelid;
2010 LOCKTAG heaplocktag;
2011 LOCKMODE lockmode;
2014 * A temporary relation uses a non-concurrent DROP. Other backends can't
2015 * access a temporary relation, so there's no harm in grabbing a stronger
2016 * lock (see comments in RemoveRelations), and a non-concurrent DROP is
2017 * more efficient.
2019 Assert(get_rel_persistence(indexId) != RELPERSISTENCE_TEMP ||
2020 (!concurrent && !concurrent_lock_mode));
2023 * To drop an index safely, we must grab exclusive lock on its parent
2024 * table. Exclusive lock on the index alone is insufficient because
2025 * another backend might be about to execute a query on the parent table.
2026 * If it relies on a previously cached list of index OIDs, then it could
2027 * attempt to access the just-dropped index. We must therefore take a
2028 * table lock strong enough to prevent all queries on the table from
2029 * proceeding until we commit and send out a shared-cache-inval notice
2030 * that will make them update their index lists.
2032 * In the concurrent case we avoid this requirement by disabling index use
2033 * in multiple steps and waiting out any transactions that might be using
2034 * the index, so we don't need exclusive lock on the parent table. Instead
2035 * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
2036 * doing CREATE/DROP INDEX CONCURRENTLY on the same index. (We will get
2037 * AccessExclusiveLock on the index below, once we're sure nobody else is
2038 * using it.)
2040 heapId = IndexGetRelation(indexId, false);
2041 lockmode = (concurrent || concurrent_lock_mode) ? ShareUpdateExclusiveLock : AccessExclusiveLock;
2042 userHeapRelation = table_open(heapId, lockmode);
2043 userIndexRelation = index_open(indexId, lockmode);
2046 * We might still have open queries using it in our own session, which the
2047 * above locking won't prevent, so test explicitly.
2049 CheckTableNotInUse(userIndexRelation, "DROP INDEX");
2052 * Drop Index Concurrently is more or less the reverse process of Create
2053 * Index Concurrently.
2055 * First we unset indisvalid so queries starting afterwards don't use the
2056 * index to answer queries anymore. We have to keep indisready = true so
2057 * transactions that are still scanning the index can continue to see
2058 * valid index contents. For instance, if they are using READ COMMITTED
2059 * mode, and another transaction makes changes and commits, they need to
2060 * see those new tuples in the index.
2062 * After all transactions that could possibly have used the index for
2063 * queries end, we can unset indisready and indislive, then wait till
2064 * nobody could be touching it anymore. (Note: we need indislive because
2065 * this state must be distinct from the initial state during CREATE INDEX
2066 * CONCURRENTLY, which has indislive true while indisready and indisvalid
2067 * are false. That's because in that state, transactions must examine the
2068 * index for HOT-safety decisions, while in this state we don't want them
2069 * to open it at all.)
2071 * Since all predicate locks on the index are about to be made invalid, we
2072 * must promote them to predicate locks on the heap. In the
2073 * non-concurrent case we can just do that now. In the concurrent case
2074 * it's a bit trickier. The predicate locks must be moved when there are
2075 * no index scans in progress on the index and no more can subsequently
2076 * start, so that no new predicate locks can be made on the index. Also,
2077 * they must be moved before heap inserts stop maintaining the index, else
2078 * the conflict with the predicate lock on the index gap could be missed
2079 * before the lock on the heap relation is in place to detect a conflict
2080 * based on the heap tuple insert.
2082 if (concurrent)
2085 * We must commit our transaction in order to make the first pg_index
2086 * state update visible to other sessions. If the DROP machinery has
2087 * already performed any other actions (removal of other objects,
2088 * pg_depend entries, etc), the commit would make those actions
2089 * permanent, which would leave us with inconsistent catalog state if
2090 * we fail partway through the following sequence. Since DROP INDEX
2091 * CONCURRENTLY is restricted to dropping just one index that has no
2092 * dependencies, we should get here before anything's been done ---
2093 * but let's check that to be sure. We can verify that the current
2094 * transaction has not executed any transactional updates by checking
2095 * that no XID has been assigned.
2097 if (GetTopTransactionIdIfAny() != InvalidTransactionId)
2098 ereport(ERROR,
2099 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2100 errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
2103 * Mark index invalid by updating its pg_index entry
2105 index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
2108 * Invalidate the relcache for the table, so that after this commit
2109 * all sessions will refresh any cached plans that might reference the
2110 * index.
2112 CacheInvalidateRelcache(userHeapRelation);
2114 /* save lockrelid and locktag for below, then close but keep locks */
2115 heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
2116 SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
2117 indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
2119 table_close(userHeapRelation, NoLock);
2120 index_close(userIndexRelation, NoLock);
2123 * We must commit our current transaction so that the indisvalid
2124 * update becomes visible to other transactions; then start another.
2125 * Note that any previously-built data structures are lost in the
2126 * commit. The only data we keep past here are the relation IDs.
2128 * Before committing, get a session-level lock on the table, to ensure
2129 * that neither it nor the index can be dropped before we finish. This
2130 * cannot block, even if someone else is waiting for access, because
2131 * we already have the same lock within our transaction.
2133 LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
2134 LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
2136 PopActiveSnapshot();
2137 CommitTransactionCommand();
2138 StartTransactionCommand();
2141 * Now we must wait until no running transaction could be using the
2142 * index for a query. Use AccessExclusiveLock here to check for
2143 * running transactions that hold locks of any kind on the table. Note
2144 * we do not need to worry about xacts that open the table for reading
2145 * after this point; they will see the index as invalid when they open
2146 * the relation.
2148 * Note: the reason we use actual lock acquisition here, rather than
2149 * just checking the ProcArray and sleeping, is that deadlock is
2150 * possible if one of the transactions in question is blocked trying
2151 * to acquire an exclusive lock on our table. The lock code will
2152 * detect deadlock and error out properly.
2154 * Note: we report progress through WaitForLockers() unconditionally
2155 * here, even though it will only be used when we're called by REINDEX
2156 * CONCURRENTLY and not when called by DROP INDEX CONCURRENTLY.
2158 WaitForLockers(heaplocktag, AccessExclusiveLock, true);
2160 /* Finish invalidation of index and mark it as dead */
2161 index_concurrently_set_dead(heapId, indexId);
2164 * Again, commit the transaction to make the pg_index update visible
2165 * to other sessions.
2167 CommitTransactionCommand();
2168 StartTransactionCommand();
2171 * Wait till every transaction that saw the old index state has
2172 * finished. See above about progress reporting.
2174 WaitForLockers(heaplocktag, AccessExclusiveLock, true);
2177 * Re-open relations to allow us to complete our actions.
2179 * At this point, nothing should be accessing the index, but lets
2180 * leave nothing to chance and grab AccessExclusiveLock on the index
2181 * before the physical deletion.
2183 userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock);
2184 userIndexRelation = index_open(indexId, AccessExclusiveLock);
2186 else
2188 /* Not concurrent, so just transfer predicate locks and we're good */
2189 TransferPredicateLocksToHeapRelation(userIndexRelation);
2193 * Schedule physical removal of the files (if any)
2195 if (userIndexRelation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
2196 RelationDropStorage(userIndexRelation);
2199 * Close and flush the index's relcache entry, to ensure relcache doesn't
2200 * try to rebuild it while we're deleting catalog entries. We keep the
2201 * lock though.
2203 index_close(userIndexRelation, NoLock);
2205 RelationForgetRelation(indexId);
2208 * fix INDEX relation, and check for expressional index
2210 indexRelation = table_open(IndexRelationId, RowExclusiveLock);
2212 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
2213 if (!HeapTupleIsValid(tuple))
2214 elog(ERROR, "cache lookup failed for index %u", indexId);
2216 hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs,
2217 RelationGetDescr(indexRelation));
2219 CatalogTupleDelete(indexRelation, &tuple->t_self);
2221 ReleaseSysCache(tuple);
2222 table_close(indexRelation, RowExclusiveLock);
2225 * if it has any expression columns, we might have stored statistics about
2226 * them.
2228 if (hasexprs)
2229 RemoveStatistics(indexId, 0);
2232 * fix ATTRIBUTE relation
2234 DeleteAttributeTuples(indexId);
2237 * fix RELATION relation
2239 DeleteRelationTuple(indexId);
2242 * fix INHERITS relation
2244 DeleteInheritsTuple(indexId, InvalidOid);
2247 * We are presently too lazy to attempt to compute the new correct value
2248 * of relhasindex (the next VACUUM will fix it if necessary). So there is
2249 * no need to update the pg_class tuple for the owning relation. But we
2250 * must send out a shared-cache-inval notice on the owning relation to
2251 * ensure other backends update their relcache lists of indexes. (In the
2252 * concurrent case, this is redundant but harmless.)
2254 CacheInvalidateRelcache(userHeapRelation);
2257 * Close owning rel, but keep lock
2259 table_close(userHeapRelation, NoLock);
2262 * Release the session locks before we go.
2264 if (concurrent)
2266 UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
2267 UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
2271 /* ----------------------------------------------------------------
2272 * index_build support
2273 * ----------------------------------------------------------------
2276 /* ----------------
2277 * BuildIndexInfo
2278 * Construct an IndexInfo record for an open index
2280 * IndexInfo stores the information about the index that's needed by
2281 * FormIndexDatum, which is used for both index_build() and later insertion
2282 * of individual index tuples. Normally we build an IndexInfo for an index
2283 * just once per command, and then use it for (potentially) many tuples.
2284 * ----------------
2286 IndexInfo *
2287 BuildIndexInfo(Relation index)
2289 IndexInfo *ii;
2290 Form_pg_index indexStruct = index->rd_index;
2291 int i;
2292 int numAtts;
2294 /* check the number of keys, and copy attr numbers into the IndexInfo */
2295 numAtts = indexStruct->indnatts;
2296 if (numAtts < 1 || numAtts > INDEX_MAX_KEYS)
2297 elog(ERROR, "invalid indnatts %d for index %u",
2298 numAtts, RelationGetRelid(index));
2301 * Create the node, fetching any expressions needed for expressional
2302 * indexes and index predicate if any.
2304 ii = makeIndexInfo(indexStruct->indnatts,
2305 indexStruct->indnkeyatts,
2306 index->rd_rel->relam,
2307 RelationGetIndexExpressions(index),
2308 RelationGetIndexPredicate(index),
2309 indexStruct->indisunique,
2310 indexStruct->indisready,
2311 false);
2313 /* fill in attribute numbers */
2314 for (i = 0; i < numAtts; i++)
2315 ii->ii_IndexAttrNumbers[i] = indexStruct->indkey.values[i];
2317 /* fetch exclusion constraint info if any */
2318 if (indexStruct->indisexclusion)
2320 RelationGetExclusionInfo(index,
2321 &ii->ii_ExclusionOps,
2322 &ii->ii_ExclusionProcs,
2323 &ii->ii_ExclusionStrats);
2326 ii->ii_OpclassOptions = RelationGetIndexRawAttOptions(index);
2328 return ii;
2331 /* ----------------
2332 * BuildDummyIndexInfo
2333 * Construct a dummy IndexInfo record for an open index
2335 * This differs from the real BuildIndexInfo in that it will never run any
2336 * user-defined code that might exist in index expressions or predicates.
2337 * Instead of the real index expressions, we return null constants that have
2338 * the right types/typmods/collations. Predicates and exclusion clauses are
2339 * just ignored. This is sufficient for the purpose of truncating an index,
2340 * since we will not need to actually evaluate the expressions or predicates;
2341 * the only thing that's likely to be done with the data is construction of
2342 * a tupdesc describing the index's rowtype.
2343 * ----------------
2345 IndexInfo *
2346 BuildDummyIndexInfo(Relation index)
2348 IndexInfo *ii;
2349 Form_pg_index indexStruct = index->rd_index;
2350 int i;
2351 int numAtts;
2353 /* check the number of keys, and copy attr numbers into the IndexInfo */
2354 numAtts = indexStruct->indnatts;
2355 if (numAtts < 1 || numAtts > INDEX_MAX_KEYS)
2356 elog(ERROR, "invalid indnatts %d for index %u",
2357 numAtts, RelationGetRelid(index));
2360 * Create the node, using dummy index expressions, and pretending there is
2361 * no predicate.
2363 ii = makeIndexInfo(indexStruct->indnatts,
2364 indexStruct->indnkeyatts,
2365 index->rd_rel->relam,
2366 RelationGetDummyIndexExpressions(index),
2367 NIL,
2368 indexStruct->indisunique,
2369 indexStruct->indisready,
2370 false);
2372 /* fill in attribute numbers */
2373 for (i = 0; i < numAtts; i++)
2374 ii->ii_IndexAttrNumbers[i] = indexStruct->indkey.values[i];
2376 /* We ignore the exclusion constraint if any */
2378 return ii;
2382 * CompareIndexInfo
2383 * Return whether the properties of two indexes (in different tables)
2384 * indicate that they have the "same" definitions.
2386 * Note: passing collations and opfamilies separately is a kludge. Adding
2387 * them to IndexInfo may result in better coding here and elsewhere.
2389 * Use build_attrmap_by_name(index2, index1) to build the attmap.
2391 bool
2392 CompareIndexInfo(IndexInfo *info1, IndexInfo *info2,
2393 Oid *collations1, Oid *collations2,
2394 Oid *opfamilies1, Oid *opfamilies2,
2395 AttrMap *attmap)
2397 int i;
2399 if (info1->ii_Unique != info2->ii_Unique)
2400 return false;
2402 /* indexes are only equivalent if they have the same access method */
2403 if (info1->ii_Am != info2->ii_Am)
2404 return false;
2406 /* and same number of attributes */
2407 if (info1->ii_NumIndexAttrs != info2->ii_NumIndexAttrs)
2408 return false;
2410 /* and same number of key attributes */
2411 if (info1->ii_NumIndexKeyAttrs != info2->ii_NumIndexKeyAttrs)
2412 return false;
2415 * and columns match through the attribute map (actual attribute numbers
2416 * might differ!) Note that this implies that index columns that are
2417 * expressions appear in the same positions. We will next compare the
2418 * expressions themselves.
2420 for (i = 0; i < info1->ii_NumIndexAttrs; i++)
2422 if (attmap->maplen < info2->ii_IndexAttrNumbers[i])
2423 elog(ERROR, "incorrect attribute map");
2425 /* ignore expressions at this stage */
2426 if ((info1->ii_IndexAttrNumbers[i] != InvalidAttrNumber) &&
2427 (attmap->attnums[info2->ii_IndexAttrNumbers[i] - 1] !=
2428 info1->ii_IndexAttrNumbers[i]))
2429 return false;
2431 /* collation and opfamily is not valid for including columns */
2432 if (i >= info1->ii_NumIndexKeyAttrs)
2433 continue;
2435 if (collations1[i] != collations2[i])
2436 return false;
2437 if (opfamilies1[i] != opfamilies2[i])
2438 return false;
2442 * For expression indexes: either both are expression indexes, or neither
2443 * is; if they are, make sure the expressions match.
2445 if ((info1->ii_Expressions != NIL) != (info2->ii_Expressions != NIL))
2446 return false;
2447 if (info1->ii_Expressions != NIL)
2449 bool found_whole_row;
2450 Node *mapped;
2452 mapped = map_variable_attnos((Node *) info2->ii_Expressions,
2453 1, 0, attmap,
2454 InvalidOid, &found_whole_row);
2455 if (found_whole_row)
2458 * we could throw an error here, but seems out of scope for this
2459 * routine.
2461 return false;
2464 if (!equal(info1->ii_Expressions, mapped))
2465 return false;
2468 /* Partial index predicates must be identical, if they exist */
2469 if ((info1->ii_Predicate == NULL) != (info2->ii_Predicate == NULL))
2470 return false;
2471 if (info1->ii_Predicate != NULL)
2473 bool found_whole_row;
2474 Node *mapped;
2476 mapped = map_variable_attnos((Node *) info2->ii_Predicate,
2477 1, 0, attmap,
2478 InvalidOid, &found_whole_row);
2479 if (found_whole_row)
2482 * we could throw an error here, but seems out of scope for this
2483 * routine.
2485 return false;
2487 if (!equal(info1->ii_Predicate, mapped))
2488 return false;
2491 /* No support currently for comparing exclusion indexes. */
2492 if (info1->ii_ExclusionOps != NULL || info2->ii_ExclusionOps != NULL)
2493 return false;
2495 return true;
2498 /* ----------------
2499 * BuildSpeculativeIndexInfo
2500 * Add extra state to IndexInfo record
2502 * For unique indexes, we usually don't want to add info to the IndexInfo for
2503 * checking uniqueness, since the B-Tree AM handles that directly. However,
2504 * in the case of speculative insertion, additional support is required.
2506 * Do this processing here rather than in BuildIndexInfo() to not incur the
2507 * overhead in the common non-speculative cases.
2508 * ----------------
2510 void
2511 BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
2513 int indnkeyatts;
2514 int i;
2516 indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index);
2519 * fetch info for checking unique indexes
2521 Assert(ii->ii_Unique);
2523 if (index->rd_rel->relam != BTREE_AM_OID)
2524 elog(ERROR, "unexpected non-btree speculative unique index");
2526 ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
2527 ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
2528 ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
2531 * We have to look up the operator's strategy number. This provides a
2532 * cross-check that the operator does match the index.
2534 /* We need the func OIDs and strategy numbers too */
2535 for (i = 0; i < indnkeyatts; i++)
2537 ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
2538 ii->ii_UniqueOps[i] =
2539 get_opfamily_member(index->rd_opfamily[i],
2540 index->rd_opcintype[i],
2541 index->rd_opcintype[i],
2542 ii->ii_UniqueStrats[i]);
2543 if (!OidIsValid(ii->ii_UniqueOps[i]))
2544 elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
2545 ii->ii_UniqueStrats[i], index->rd_opcintype[i],
2546 index->rd_opcintype[i], index->rd_opfamily[i]);
2547 ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
2551 /* ----------------
2552 * FormIndexDatum
2553 * Construct values[] and isnull[] arrays for a new index tuple.
2555 * indexInfo Info about the index
2556 * slot Heap tuple for which we must prepare an index entry
2557 * estate executor state for evaluating any index expressions
2558 * values Array of index Datums (output area)
2559 * isnull Array of is-null indicators (output area)
2561 * When there are no index expressions, estate may be NULL. Otherwise it
2562 * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
2563 * context must point to the heap tuple passed in.
2565 * Notice we don't actually call index_form_tuple() here; we just prepare
2566 * its input arrays values[] and isnull[]. This is because the index AM
2567 * may wish to alter the data before storage.
2568 * ----------------
2570 void
2571 FormIndexDatum(IndexInfo *indexInfo,
2572 TupleTableSlot *slot,
2573 EState *estate,
2574 Datum *values,
2575 bool *isnull)
2577 ListCell *indexpr_item;
2578 int i;
2580 if (indexInfo->ii_Expressions != NIL &&
2581 indexInfo->ii_ExpressionsState == NIL)
2583 /* First time through, set up expression evaluation state */
2584 indexInfo->ii_ExpressionsState =
2585 ExecPrepareExprList(indexInfo->ii_Expressions, estate);
2586 /* Check caller has set up context correctly */
2587 Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
2589 indexpr_item = list_head(indexInfo->ii_ExpressionsState);
2591 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
2593 int keycol = indexInfo->ii_IndexAttrNumbers[i];
2594 Datum iDatum;
2595 bool isNull;
2597 if (keycol < 0)
2598 iDatum = slot_getsysattr(slot, keycol, &isNull);
2599 else if (keycol != 0)
2602 * Plain index column; get the value we need directly from the
2603 * heap tuple.
2605 iDatum = slot_getattr(slot, keycol, &isNull);
2607 else
2610 * Index expression --- need to evaluate it.
2612 if (indexpr_item == NULL)
2613 elog(ERROR, "wrong number of index expressions");
2614 iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
2615 GetPerTupleExprContext(estate),
2616 &isNull);
2617 indexpr_item = lnext(indexInfo->ii_ExpressionsState, indexpr_item);
2619 values[i] = iDatum;
2620 isnull[i] = isNull;
2623 if (indexpr_item != NULL)
2624 elog(ERROR, "wrong number of index expressions");
2629 * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
2631 * This routine updates the pg_class row of either an index or its parent
2632 * relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed
2633 * to ensure we can do all the necessary work in just one update.
2635 * hasindex: set relhasindex to this value
2636 * reltuples: if >= 0, set reltuples to this value; else no change
2638 * If reltuples >= 0, relpages and relallvisible are also updated (using
2639 * RelationGetNumberOfBlocks() and visibilitymap_count()).
2641 * NOTE: an important side-effect of this operation is that an SI invalidation
2642 * message is sent out to all backends --- including me --- causing relcache
2643 * entries to be flushed or updated with the new data. This must happen even
2644 * if we find that no change is needed in the pg_class row. When updating
2645 * a heap entry, this ensures that other backends find out about the new
2646 * index. When updating an index, it's important because some index AMs
2647 * expect a relcache flush to occur after REINDEX.
2649 static void
2650 index_update_stats(Relation rel,
2651 bool hasindex,
2652 double reltuples)
2654 Oid relid = RelationGetRelid(rel);
2655 Relation pg_class;
2656 HeapTuple tuple;
2657 Form_pg_class rd_rel;
2658 bool dirty;
2661 * We always update the pg_class row using a non-transactional,
2662 * overwrite-in-place update. There are several reasons for this:
2664 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
2666 * 2. We could be reindexing pg_class itself, in which case we can't move
2667 * its pg_class row because CatalogTupleInsert/CatalogTupleUpdate might
2668 * not know about all the indexes yet (see reindex_relation).
2670 * 3. Because we execute CREATE INDEX with just share lock on the parent
2671 * rel (to allow concurrent index creations), an ordinary update could
2672 * suffer a tuple-concurrently-updated failure against another CREATE
2673 * INDEX committing at about the same time. We can avoid that by having
2674 * them both do nontransactional updates (we assume they will both be
2675 * trying to change the pg_class row to the same thing, so it doesn't
2676 * matter which goes first).
2678 * It is safe to use a non-transactional update even though our
2679 * transaction could still fail before committing. Setting relhasindex
2680 * true is safe even if there are no indexes (VACUUM will eventually fix
2681 * it). And of course the new relpages and reltuples counts are correct
2682 * regardless. However, we don't want to change relpages (or
2683 * relallvisible) if the caller isn't providing an updated reltuples
2684 * count, because that would bollix the reltuples/relpages ratio which is
2685 * what's really important.
2688 pg_class = table_open(RelationRelationId, RowExclusiveLock);
2691 * Make a copy of the tuple to update. Normally we use the syscache, but
2692 * we can't rely on that during bootstrap or while reindexing pg_class
2693 * itself.
2695 if (IsBootstrapProcessingMode() ||
2696 ReindexIsProcessingHeap(RelationRelationId))
2698 /* don't assume syscache will work */
2699 TableScanDesc pg_class_scan;
2700 ScanKeyData key[1];
2702 ScanKeyInit(&key[0],
2703 Anum_pg_class_oid,
2704 BTEqualStrategyNumber, F_OIDEQ,
2705 ObjectIdGetDatum(relid));
2707 pg_class_scan = table_beginscan_catalog(pg_class, 1, key);
2708 tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
2709 tuple = heap_copytuple(tuple);
2710 table_endscan(pg_class_scan);
2712 else
2714 /* normal case, use syscache */
2715 tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
2718 if (!HeapTupleIsValid(tuple))
2719 elog(ERROR, "could not find tuple for relation %u", relid);
2720 rd_rel = (Form_pg_class) GETSTRUCT(tuple);
2722 /* Should this be a more comprehensive test? */
2723 Assert(rd_rel->relkind != RELKIND_PARTITIONED_INDEX);
2725 /* Apply required updates, if any, to copied tuple */
2727 dirty = false;
2728 if (rd_rel->relhasindex != hasindex)
2730 rd_rel->relhasindex = hasindex;
2731 dirty = true;
2734 if (reltuples >= 0)
2736 BlockNumber relpages = RelationGetNumberOfBlocks(rel);
2737 BlockNumber relallvisible;
2739 if (rd_rel->relkind != RELKIND_INDEX)
2740 visibilitymap_count(rel, &relallvisible, NULL);
2741 else /* don't bother for indexes */
2742 relallvisible = 0;
2744 if (rd_rel->relpages != (int32) relpages)
2746 rd_rel->relpages = (int32) relpages;
2747 dirty = true;
2749 if (rd_rel->reltuples != (float4) reltuples)
2751 rd_rel->reltuples = (float4) reltuples;
2752 dirty = true;
2754 if (rd_rel->relallvisible != (int32) relallvisible)
2756 rd_rel->relallvisible = (int32) relallvisible;
2757 dirty = true;
2762 * If anything changed, write out the tuple
2764 if (dirty)
2766 heap_inplace_update(pg_class, tuple);
2767 /* the above sends a cache inval message */
2769 else
2771 /* no need to change tuple, but force relcache inval anyway */
2772 CacheInvalidateRelcacheByTuple(tuple);
2775 heap_freetuple(tuple);
2777 table_close(pg_class, RowExclusiveLock);
2782 * index_build - invoke access-method-specific index build procedure
2784 * On entry, the index's catalog entries are valid, and its physical disk
2785 * file has been created but is empty. We call the AM-specific build
2786 * procedure to fill in the index contents. We then update the pg_class
2787 * entries of the index and heap relation as needed, using statistics
2788 * returned by ambuild as well as data passed by the caller.
2790 * isreindex indicates we are recreating a previously-existing index.
2791 * parallel indicates if parallelism may be useful.
2793 * Note: before Postgres 8.2, the passed-in heap and index Relations
2794 * were automatically closed by this routine. This is no longer the case.
2795 * The caller opened 'em, and the caller should close 'em.
2797 void
2798 index_build(Relation heapRelation,
2799 Relation indexRelation,
2800 IndexInfo *indexInfo,
2801 bool isreindex,
2802 bool parallel)
2804 IndexBuildResult *stats;
2805 Oid save_userid;
2806 int save_sec_context;
2807 int save_nestlevel;
2810 * sanity checks
2812 Assert(RelationIsValid(indexRelation));
2813 Assert(PointerIsValid(indexRelation->rd_indam));
2814 Assert(PointerIsValid(indexRelation->rd_indam->ambuild));
2815 Assert(PointerIsValid(indexRelation->rd_indam->ambuildempty));
2818 * Determine worker process details for parallel CREATE INDEX. Currently,
2819 * only btree has support for parallel builds.
2821 * Note that planner considers parallel safety for us.
2823 if (parallel && IsNormalProcessingMode() &&
2824 indexRelation->rd_rel->relam == BTREE_AM_OID)
2825 indexInfo->ii_ParallelWorkers =
2826 plan_create_index_workers(RelationGetRelid(heapRelation),
2827 RelationGetRelid(indexRelation));
2829 if (indexInfo->ii_ParallelWorkers == 0)
2830 ereport(DEBUG1,
2831 (errmsg("building index \"%s\" on table \"%s\" serially",
2832 RelationGetRelationName(indexRelation),
2833 RelationGetRelationName(heapRelation))));
2834 else
2835 ereport(DEBUG1,
2836 (errmsg_plural("building index \"%s\" on table \"%s\" with request for %d parallel worker",
2837 "building index \"%s\" on table \"%s\" with request for %d parallel workers",
2838 indexInfo->ii_ParallelWorkers,
2839 RelationGetRelationName(indexRelation),
2840 RelationGetRelationName(heapRelation),
2841 indexInfo->ii_ParallelWorkers)));
2844 * Switch to the table owner's userid, so that any index functions are run
2845 * as that user. Also lock down security-restricted operations and
2846 * arrange to make GUC variable changes local to this command.
2848 GetUserIdAndSecContext(&save_userid, &save_sec_context);
2849 SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2850 save_sec_context | SECURITY_RESTRICTED_OPERATION);
2851 save_nestlevel = NewGUCNestLevel();
2853 /* Set up initial progress report status */
2855 const int index[] = {
2856 PROGRESS_CREATEIDX_PHASE,
2857 PROGRESS_CREATEIDX_SUBPHASE,
2858 PROGRESS_CREATEIDX_TUPLES_DONE,
2859 PROGRESS_CREATEIDX_TUPLES_TOTAL,
2860 PROGRESS_SCAN_BLOCKS_DONE,
2861 PROGRESS_SCAN_BLOCKS_TOTAL
2863 const int64 val[] = {
2864 PROGRESS_CREATEIDX_PHASE_BUILD,
2865 PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE,
2866 0, 0, 0, 0
2869 pgstat_progress_update_multi_param(6, index, val);
2873 * Call the access method's build procedure
2875 stats = indexRelation->rd_indam->ambuild(heapRelation, indexRelation,
2876 indexInfo);
2877 Assert(PointerIsValid(stats));
2880 * If this is an unlogged index, we may need to write out an init fork for
2881 * it -- but we must first check whether one already exists. If, for
2882 * example, an unlogged relation is truncated in the transaction that
2883 * created it, or truncated twice in a subsequent transaction, the
2884 * relfilenode won't change, and nothing needs to be done here.
2886 if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
2887 !smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
2889 RelationOpenSmgr(indexRelation);
2890 smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
2891 indexRelation->rd_indam->ambuildempty(indexRelation);
2895 * If we found any potentially broken HOT chains, mark the index as not
2896 * being usable until the current transaction is below the event horizon.
2897 * See src/backend/access/heap/README.HOT for discussion. Also set this
2898 * if early pruning/vacuuming is enabled for the heap relation. While it
2899 * might become safe to use the index earlier based on actual cleanup
2900 * activity and other active transactions, the test for that would be much
2901 * more complex and would require some form of blocking, so keep it simple
2902 * and fast by just using the current transaction.
2904 * However, when reindexing an existing index, we should do nothing here.
2905 * Any HOT chains that are broken with respect to the index must predate
2906 * the index's original creation, so there is no need to change the
2907 * index's usability horizon. Moreover, we *must not* try to change the
2908 * index's pg_index entry while reindexing pg_index itself, and this
2909 * optimization nicely prevents that. The more complex rules needed for a
2910 * reindex are handled separately after this function returns.
2912 * We also need not set indcheckxmin during a concurrent index build,
2913 * because we won't set indisvalid true until all transactions that care
2914 * about the broken HOT chains or early pruning/vacuuming are gone.
2916 * Therefore, this code path can only be taken during non-concurrent
2917 * CREATE INDEX. Thus the fact that heap_update will set the pg_index
2918 * tuple's xmin doesn't matter, because that tuple was created in the
2919 * current transaction anyway. That also means we don't need to worry
2920 * about any concurrent readers of the tuple; no other transaction can see
2921 * it yet.
2923 if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
2924 !isreindex &&
2925 !indexInfo->ii_Concurrent)
2927 Oid indexId = RelationGetRelid(indexRelation);
2928 Relation pg_index;
2929 HeapTuple indexTuple;
2930 Form_pg_index indexForm;
2932 pg_index = table_open(IndexRelationId, RowExclusiveLock);
2934 indexTuple = SearchSysCacheCopy1(INDEXRELID,
2935 ObjectIdGetDatum(indexId));
2936 if (!HeapTupleIsValid(indexTuple))
2937 elog(ERROR, "cache lookup failed for index %u", indexId);
2938 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2940 /* If it's a new index, indcheckxmin shouldn't be set ... */
2941 Assert(!indexForm->indcheckxmin);
2943 indexForm->indcheckxmin = true;
2944 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
2946 heap_freetuple(indexTuple);
2947 table_close(pg_index, RowExclusiveLock);
2951 * Update heap and index pg_class rows
2953 index_update_stats(heapRelation,
2954 true,
2955 stats->heap_tuples);
2957 index_update_stats(indexRelation,
2958 false,
2959 stats->index_tuples);
2961 /* Make the updated catalog row versions visible */
2962 CommandCounterIncrement();
2965 * If it's for an exclusion constraint, make a second pass over the heap
2966 * to verify that the constraint is satisfied. We must not do this until
2967 * the index is fully valid. (Broken HOT chains shouldn't matter, though;
2968 * see comments for IndexCheckExclusion.)
2970 if (indexInfo->ii_ExclusionOps != NULL)
2971 IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
2973 /* Roll back any GUC changes executed by index functions */
2974 AtEOXact_GUC(false, save_nestlevel);
2976 /* Restore userid and security context */
2977 SetUserIdAndSecContext(save_userid, save_sec_context);
2981 * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
2983 * When creating an exclusion constraint, we first build the index normally
2984 * and then rescan the heap to check for conflicts. We assume that we only
2985 * need to validate tuples that are live according to an up-to-date snapshot,
2986 * and that these were correctly indexed even in the presence of broken HOT
2987 * chains. This should be OK since we are holding at least ShareLock on the
2988 * table, meaning there can be no uncommitted updates from other transactions.
2989 * (Note: that wouldn't necessarily work for system catalogs, since many
2990 * operations release write lock early on the system catalogs.)
2992 static void
2993 IndexCheckExclusion(Relation heapRelation,
2994 Relation indexRelation,
2995 IndexInfo *indexInfo)
2997 TableScanDesc scan;
2998 Datum values[INDEX_MAX_KEYS];
2999 bool isnull[INDEX_MAX_KEYS];
3000 ExprState *predicate;
3001 TupleTableSlot *slot;
3002 EState *estate;
3003 ExprContext *econtext;
3004 Snapshot snapshot;
3007 * If we are reindexing the target index, mark it as no longer being
3008 * reindexed, to forestall an Assert in index_beginscan when we try to use
3009 * the index for probes. This is OK because the index is now fully valid.
3011 if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
3012 ResetReindexProcessing();
3015 * Need an EState for evaluation of index expressions and partial-index
3016 * predicates. Also a slot to hold the current tuple.
3018 estate = CreateExecutorState();
3019 econtext = GetPerTupleExprContext(estate);
3020 slot = table_slot_create(heapRelation, NULL);
3022 /* Arrange for econtext's scan tuple to be the tuple under test */
3023 econtext->ecxt_scantuple = slot;
3025 /* Set up execution state for predicate, if any. */
3026 predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
3029 * Scan all live tuples in the base relation.
3031 snapshot = RegisterSnapshot(GetLatestSnapshot());
3032 scan = table_beginscan_strat(heapRelation, /* relation */
3033 snapshot, /* snapshot */
3034 0, /* number of keys */
3035 NULL, /* scan key */
3036 true, /* buffer access strategy OK */
3037 true); /* syncscan OK */
3039 while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
3041 CHECK_FOR_INTERRUPTS();
3044 * In a partial index, ignore tuples that don't satisfy the predicate.
3046 if (predicate != NULL)
3048 if (!ExecQual(predicate, econtext))
3049 continue;
3053 * Extract index column values, including computing expressions.
3055 FormIndexDatum(indexInfo,
3056 slot,
3057 estate,
3058 values,
3059 isnull);
3062 * Check that this tuple has no conflicts.
3064 check_exclusion_constraint(heapRelation,
3065 indexRelation, indexInfo,
3066 &(slot->tts_tid), values, isnull,
3067 estate, true);
3069 MemoryContextReset(econtext->ecxt_per_tuple_memory);
3072 table_endscan(scan);
3073 UnregisterSnapshot(snapshot);
3075 ExecDropSingleTupleTableSlot(slot);
3077 FreeExecutorState(estate);
3079 /* These may have been pointing to the now-gone estate */
3080 indexInfo->ii_ExpressionsState = NIL;
3081 indexInfo->ii_PredicateState = NULL;
3086 * validate_index - support code for concurrent index builds
3088 * We do a concurrent index build by first inserting the catalog entry for the
3089 * index via index_create(), marking it not indisready and not indisvalid.
3090 * Then we commit our transaction and start a new one, then we wait for all
3091 * transactions that could have been modifying the table to terminate. Now
3092 * we know that any subsequently-started transactions will see the index and
3093 * honor its constraints on HOT updates; so while existing HOT-chains might
3094 * be broken with respect to the index, no currently live tuple will have an
3095 * incompatible HOT update done to it. We now build the index normally via
3096 * index_build(), while holding a weak lock that allows concurrent
3097 * insert/update/delete. Also, we index only tuples that are valid
3098 * as of the start of the scan (see table_index_build_scan), whereas a normal
3099 * build takes care to include recently-dead tuples. This is OK because
3100 * we won't mark the index valid until all transactions that might be able
3101 * to see those tuples are gone. The reason for doing that is to avoid
3102 * bogus unique-index failures due to concurrent UPDATEs (we might see
3103 * different versions of the same row as being valid when we pass over them,
3104 * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
3105 * does not contain any tuples added to the table while we built the index.
3107 * Next, we mark the index "indisready" (but still not "indisvalid") and
3108 * commit the second transaction and start a third. Again we wait for all
3109 * transactions that could have been modifying the table to terminate. Now
3110 * we know that any subsequently-started transactions will see the index and
3111 * insert their new tuples into it. We then take a new reference snapshot
3112 * which is passed to validate_index(). Any tuples that are valid according
3113 * to this snap, but are not in the index, must be added to the index.
3114 * (Any tuples committed live after the snap will be inserted into the
3115 * index by their originating transaction. Any tuples committed dead before
3116 * the snap need not be indexed, because we will wait out all transactions
3117 * that might care about them before we mark the index valid.)
3119 * validate_index() works by first gathering all the TIDs currently in the
3120 * index, using a bulkdelete callback that just stores the TIDs and doesn't
3121 * ever say "delete it". (This should be faster than a plain indexscan;
3122 * also, not all index AMs support full-index indexscan.) Then we sort the
3123 * TIDs, and finally scan the table doing a "merge join" against the TID list
3124 * to see which tuples are missing from the index. Thus we will ensure that
3125 * all tuples valid according to the reference snapshot are in the index.
3127 * Building a unique index this way is tricky: we might try to insert a
3128 * tuple that is already dead or is in process of being deleted, and we
3129 * mustn't have a uniqueness failure against an updated version of the same
3130 * row. We could try to check the tuple to see if it's already dead and tell
3131 * index_insert() not to do the uniqueness check, but that still leaves us
3132 * with a race condition against an in-progress update. To handle that,
3133 * we expect the index AM to recheck liveness of the to-be-inserted tuple
3134 * before it declares a uniqueness error.
3136 * After completing validate_index(), we wait until all transactions that
3137 * were alive at the time of the reference snapshot are gone; this is
3138 * necessary to be sure there are none left with a transaction snapshot
3139 * older than the reference (and hence possibly able to see tuples we did
3140 * not index). Then we mark the index "indisvalid" and commit. Subsequent
3141 * transactions will be able to use it for queries.
3143 * Doing two full table scans is a brute-force strategy. We could try to be
3144 * cleverer, eg storing new tuples in a special area of the table (perhaps
3145 * making the table append-only by setting use_fsm). However that would
3146 * add yet more locking issues.
3148 void
3149 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
3151 Relation heapRelation,
3152 indexRelation;
3153 IndexInfo *indexInfo;
3154 IndexVacuumInfo ivinfo;
3155 ValidateIndexState state;
3156 Oid save_userid;
3157 int save_sec_context;
3158 int save_nestlevel;
3161 const int index[] = {
3162 PROGRESS_CREATEIDX_PHASE,
3163 PROGRESS_CREATEIDX_TUPLES_DONE,
3164 PROGRESS_CREATEIDX_TUPLES_TOTAL,
3165 PROGRESS_SCAN_BLOCKS_DONE,
3166 PROGRESS_SCAN_BLOCKS_TOTAL
3168 const int64 val[] = {
3169 PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN,
3170 0, 0, 0, 0
3173 pgstat_progress_update_multi_param(5, index, val);
3176 /* Open and lock the parent heap relation */
3177 heapRelation = table_open(heapId, ShareUpdateExclusiveLock);
3178 /* And the target index relation */
3179 indexRelation = index_open(indexId, RowExclusiveLock);
3182 * Fetch info needed for index_insert. (You might think this should be
3183 * passed in from DefineIndex, but its copy is long gone due to having
3184 * been built in a previous transaction.)
3186 indexInfo = BuildIndexInfo(indexRelation);
3188 /* mark build is concurrent just for consistency */
3189 indexInfo->ii_Concurrent = true;
3192 * Switch to the table owner's userid, so that any index functions are run
3193 * as that user. Also lock down security-restricted operations and
3194 * arrange to make GUC variable changes local to this command.
3196 GetUserIdAndSecContext(&save_userid, &save_sec_context);
3197 SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
3198 save_sec_context | SECURITY_RESTRICTED_OPERATION);
3199 save_nestlevel = NewGUCNestLevel();
3202 * Scan the index and gather up all the TIDs into a tuplesort object.
3204 ivinfo.index = indexRelation;
3205 ivinfo.analyze_only = false;
3206 ivinfo.report_progress = true;
3207 ivinfo.estimated_count = true;
3208 ivinfo.message_level = DEBUG2;
3209 ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
3210 ivinfo.strategy = NULL;
3213 * Encode TIDs as int8 values for the sort, rather than directly sorting
3214 * item pointers. This can be significantly faster, primarily because TID
3215 * is a pass-by-reference type on all platforms, whereas int8 is
3216 * pass-by-value on most platforms.
3218 state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator,
3219 InvalidOid, false,
3220 maintenance_work_mem,
3221 NULL, false);
3222 state.htups = state.itups = state.tups_inserted = 0;
3224 /* ambulkdelete updates progress metrics */
3225 (void) index_bulk_delete(&ivinfo, NULL,
3226 validate_index_callback, (void *) &state);
3228 /* Execute the sort */
3230 const int index[] = {
3231 PROGRESS_CREATEIDX_PHASE,
3232 PROGRESS_SCAN_BLOCKS_DONE,
3233 PROGRESS_SCAN_BLOCKS_TOTAL
3235 const int64 val[] = {
3236 PROGRESS_CREATEIDX_PHASE_VALIDATE_SORT,
3237 0, 0
3240 pgstat_progress_update_multi_param(3, index, val);
3242 tuplesort_performsort(state.tuplesort);
3245 * Now scan the heap and "merge" it with the index
3247 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3248 PROGRESS_CREATEIDX_PHASE_VALIDATE_TABLESCAN);
3249 table_index_validate_scan(heapRelation,
3250 indexRelation,
3251 indexInfo,
3252 snapshot,
3253 &state);
3255 /* Done with tuplesort object */
3256 tuplesort_end(state.tuplesort);
3258 elog(DEBUG2,
3259 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
3260 state.htups, state.itups, state.tups_inserted);
3262 /* Roll back any GUC changes executed by index functions */
3263 AtEOXact_GUC(false, save_nestlevel);
3265 /* Restore userid and security context */
3266 SetUserIdAndSecContext(save_userid, save_sec_context);
3268 /* Close rels, but keep locks */
3269 index_close(indexRelation, NoLock);
3270 table_close(heapRelation, NoLock);
3274 * validate_index_callback - bulkdelete callback to collect the index TIDs
3276 static bool
3277 validate_index_callback(ItemPointer itemptr, void *opaque)
3279 ValidateIndexState *state = (ValidateIndexState *) opaque;
3280 int64 encoded = itemptr_encode(itemptr);
3282 tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
3283 state->itups += 1;
3284 return false; /* never actually delete anything */
3288 * index_set_state_flags - adjust pg_index state flags
3290 * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
3291 * flags that denote the index's state. Because the update is not
3292 * transactional and will not roll back on error, this must only be used as
3293 * the last step in a transaction that has not made any transactional catalog
3294 * updates!
3296 * Note that heap_inplace_update does send a cache inval message for the
3297 * tuple, so other sessions will hear about the update as soon as we commit.
3299 * NB: In releases prior to PostgreSQL 9.4, the use of a non-transactional
3300 * update here would have been unsafe; now that MVCC rules apply even for
3301 * system catalog scans, we could potentially use a transactional update here
3302 * instead.
3304 void
3305 index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
3307 Relation pg_index;
3308 HeapTuple indexTuple;
3309 Form_pg_index indexForm;
3311 /* Assert that current xact hasn't done any transactional updates */
3312 Assert(GetTopTransactionIdIfAny() == InvalidTransactionId);
3314 /* Open pg_index and fetch a writable copy of the index's tuple */
3315 pg_index = table_open(IndexRelationId, RowExclusiveLock);
3317 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3318 ObjectIdGetDatum(indexId));
3319 if (!HeapTupleIsValid(indexTuple))
3320 elog(ERROR, "cache lookup failed for index %u", indexId);
3321 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3323 /* Perform the requested state change on the copy */
3324 switch (action)
3326 case INDEX_CREATE_SET_READY:
3327 /* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
3328 Assert(indexForm->indislive);
3329 Assert(!indexForm->indisready);
3330 Assert(!indexForm->indisvalid);
3331 indexForm->indisready = true;
3332 break;
3333 case INDEX_CREATE_SET_VALID:
3334 /* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
3335 Assert(indexForm->indislive);
3336 Assert(indexForm->indisready);
3337 Assert(!indexForm->indisvalid);
3338 indexForm->indisvalid = true;
3339 break;
3340 case INDEX_DROP_CLEAR_VALID:
3343 * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
3345 * If indisready == true we leave it set so the index still gets
3346 * maintained by active transactions. We only need to ensure that
3347 * indisvalid is false. (We don't assert that either is initially
3348 * true, though, since we want to be able to retry a DROP INDEX
3349 * CONCURRENTLY that failed partway through.)
3351 * Note: the CLUSTER logic assumes that indisclustered cannot be
3352 * set on any invalid index, so clear that flag too.
3354 indexForm->indisvalid = false;
3355 indexForm->indisclustered = false;
3356 break;
3357 case INDEX_DROP_SET_DEAD:
3360 * Clear indisready/indislive during DROP INDEX CONCURRENTLY
3362 * We clear both indisready and indislive, because we not only
3363 * want to stop updates, we want to prevent sessions from touching
3364 * the index at all.
3366 Assert(!indexForm->indisvalid);
3367 indexForm->indisready = false;
3368 indexForm->indislive = false;
3369 break;
3372 /* ... and write it back in-place */
3373 heap_inplace_update(pg_index, indexTuple);
3375 table_close(pg_index, RowExclusiveLock);
3380 * IndexGetRelation: given an index's relation OID, get the OID of the
3381 * relation it is an index on. Uses the system cache.
3384 IndexGetRelation(Oid indexId, bool missing_ok)
3386 HeapTuple tuple;
3387 Form_pg_index index;
3388 Oid result;
3390 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
3391 if (!HeapTupleIsValid(tuple))
3393 if (missing_ok)
3394 return InvalidOid;
3395 elog(ERROR, "cache lookup failed for index %u", indexId);
3397 index = (Form_pg_index) GETSTRUCT(tuple);
3398 Assert(index->indexrelid == indexId);
3400 result = index->indrelid;
3401 ReleaseSysCache(tuple);
3402 return result;
3406 * reindex_index - This routine is used to recreate a single index
3408 void
3409 reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
3410 int options)
3412 Relation iRel,
3413 heapRelation;
3414 Oid heapId;
3415 IndexInfo *indexInfo;
3416 volatile bool skipped_constraint = false;
3417 PGRUsage ru0;
3418 bool progress = (options & REINDEXOPT_REPORT_PROGRESS) != 0;
3420 pg_rusage_init(&ru0);
3423 * Open and lock the parent heap relation. ShareLock is sufficient since
3424 * we only need to be sure no schema or data changes are going on.
3426 heapId = IndexGetRelation(indexId, false);
3427 heapRelation = table_open(heapId, ShareLock);
3429 if (progress)
3431 pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
3432 heapId);
3433 pgstat_progress_update_param(PROGRESS_CREATEIDX_COMMAND,
3434 PROGRESS_CREATEIDX_COMMAND_REINDEX);
3435 pgstat_progress_update_param(PROGRESS_CREATEIDX_INDEX_OID,
3436 indexId);
3440 * Open the target index relation and get an exclusive lock on it, to
3441 * ensure that no one else is touching this particular index.
3443 iRel = index_open(indexId, AccessExclusiveLock);
3445 if (progress)
3446 pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
3447 iRel->rd_rel->relam);
3450 * The case of reindexing partitioned tables and indexes is handled
3451 * differently by upper layers, so this case shouldn't arise.
3453 if (iRel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
3454 elog(ERROR, "unsupported relation kind for index \"%s\"",
3455 RelationGetRelationName(iRel));
3458 * Don't allow reindex on temp tables of other backends ... their local
3459 * buffer manager is not going to cope.
3461 if (RELATION_IS_OTHER_TEMP(iRel))
3462 ereport(ERROR,
3463 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3464 errmsg("cannot reindex temporary tables of other sessions")));
3467 * Don't allow reindex of an invalid index on TOAST table. This is a
3468 * leftover from a failed REINDEX CONCURRENTLY, and if rebuilt it would
3469 * not be possible to drop it anymore.
3471 if (IsToastNamespace(RelationGetNamespace(iRel)) &&
3472 !get_index_isvalid(indexId))
3473 ereport(ERROR,
3474 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3475 errmsg("cannot reindex invalid index on TOAST table")));
3478 * Also check for active uses of the index in the current transaction; we
3479 * don't want to reindex underneath an open indexscan.
3481 CheckTableNotInUse(iRel, "REINDEX INDEX");
3484 * All predicate locks on the index are about to be made invalid. Promote
3485 * them to relation locks on the heap.
3487 TransferPredicateLocksToHeapRelation(iRel);
3489 /* Fetch info needed for index_build */
3490 indexInfo = BuildIndexInfo(iRel);
3492 /* If requested, skip checking uniqueness/exclusion constraints */
3493 if (skip_constraint_checks)
3495 if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
3496 skipped_constraint = true;
3497 indexInfo->ii_Unique = false;
3498 indexInfo->ii_ExclusionOps = NULL;
3499 indexInfo->ii_ExclusionProcs = NULL;
3500 indexInfo->ii_ExclusionStrats = NULL;
3503 /* Suppress use of the target index while rebuilding it */
3504 SetReindexProcessing(heapId, indexId);
3506 /* Create a new physical relation for the index */
3507 RelationSetNewRelfilenode(iRel, persistence);
3509 /* Initialize the index and rebuild */
3510 /* Note: we do not need to re-establish pkey setting */
3511 index_build(heapRelation, iRel, indexInfo, true, true);
3513 /* Re-allow use of target index */
3514 ResetReindexProcessing();
3517 * If the index is marked invalid/not-ready/dead (ie, it's from a failed
3518 * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
3519 * and we didn't skip a uniqueness check, we can now mark it valid. This
3520 * allows REINDEX to be used to clean up in such cases.
3522 * We can also reset indcheckxmin, because we have now done a
3523 * non-concurrent index build, *except* in the case where index_build
3524 * found some still-broken HOT chains. If it did, and we don't have to
3525 * change any of the other flags, we just leave indcheckxmin alone (note
3526 * that index_build won't have changed it, because this is a reindex).
3527 * This is okay and desirable because not updating the tuple leaves the
3528 * index's usability horizon (recorded as the tuple's xmin value) the same
3529 * as it was.
3531 * But, if the index was invalid/not-ready/dead and there were broken HOT
3532 * chains, we had better force indcheckxmin true, because the normal
3533 * argument that the HOT chains couldn't conflict with the index is
3534 * suspect for an invalid index. (A conflict is definitely possible if
3535 * the index was dead. It probably shouldn't happen otherwise, but let's
3536 * be conservative.) In this case advancing the usability horizon is
3537 * appropriate.
3539 * Another reason for avoiding unnecessary updates here is that while
3540 * reindexing pg_index itself, we must not try to update tuples in it.
3541 * pg_index's indexes should always have these flags in their clean state,
3542 * so that won't happen.
3544 * If early pruning/vacuuming is enabled for the heap relation, the
3545 * usability horizon must be advanced to the current transaction on every
3546 * build or rebuild. pg_index is OK in this regard because catalog tables
3547 * are not subject to early cleanup.
3549 if (!skipped_constraint)
3551 Relation pg_index;
3552 HeapTuple indexTuple;
3553 Form_pg_index indexForm;
3554 bool index_bad;
3555 bool early_pruning_enabled = EarlyPruningEnabled(heapRelation);
3557 pg_index = table_open(IndexRelationId, RowExclusiveLock);
3559 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3560 ObjectIdGetDatum(indexId));
3561 if (!HeapTupleIsValid(indexTuple))
3562 elog(ERROR, "cache lookup failed for index %u", indexId);
3563 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3565 index_bad = (!indexForm->indisvalid ||
3566 !indexForm->indisready ||
3567 !indexForm->indislive);
3568 if (index_bad ||
3569 (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
3570 early_pruning_enabled)
3572 if (!indexInfo->ii_BrokenHotChain && !early_pruning_enabled)
3573 indexForm->indcheckxmin = false;
3574 else if (index_bad || early_pruning_enabled)
3575 indexForm->indcheckxmin = true;
3576 indexForm->indisvalid = true;
3577 indexForm->indisready = true;
3578 indexForm->indislive = true;
3579 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3582 * Invalidate the relcache for the table, so that after we commit
3583 * all sessions will refresh the table's index list. This ensures
3584 * that if anyone misses seeing the pg_index row during this
3585 * update, they'll refresh their list before attempting any update
3586 * on the table.
3588 CacheInvalidateRelcache(heapRelation);
3591 table_close(pg_index, RowExclusiveLock);
3594 /* Log what we did */
3595 if (options & REINDEXOPT_VERBOSE)
3596 ereport(INFO,
3597 (errmsg("index \"%s\" was reindexed",
3598 get_rel_name(indexId)),
3599 errdetail_internal("%s",
3600 pg_rusage_show(&ru0))));
3602 if (progress)
3603 pgstat_progress_end_command();
3605 /* Close rels, but keep locks */
3606 index_close(iRel, NoLock);
3607 table_close(heapRelation, NoLock);
3611 * reindex_relation - This routine is used to recreate all indexes
3612 * of a relation (and optionally its toast relation too, if any).
3614 * "flags" is a bitmask that can include any combination of these bits:
3616 * REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
3618 * REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
3619 * rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
3620 * indexes are inconsistent with it. This makes things tricky if the relation
3621 * is a system catalog that we might consult during the reindexing. To deal
3622 * with that case, we mark all of the indexes as pending rebuild so that they
3623 * won't be trusted until rebuilt. The caller is required to call us *without*
3624 * having made the rebuilt table visible by doing CommandCounterIncrement;
3625 * we'll do CCI after having collected the index list. (This way we can still
3626 * use catalog indexes while collecting the list.)
3628 * REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
3629 * constraint conditions, else don't. To avoid deadlocks, VACUUM FULL or
3630 * CLUSTER on a system catalog must omit this flag. REINDEX should be used to
3631 * rebuild an index if constraint inconsistency is suspected. For optimal
3632 * performance, other callers should include the flag only after transforming
3633 * the data in a manner that risks a change in constraint validity.
3635 * REINDEX_REL_FORCE_INDEXES_UNLOGGED: if true, set the persistence of the
3636 * rebuilt indexes to unlogged.
3638 * REINDEX_REL_FORCE_INDEXES_PERMANENT: if true, set the persistence of the
3639 * rebuilt indexes to permanent.
3641 * Returns true if any indexes were rebuilt (including toast table's index
3642 * when relevant). Note that a CommandCounterIncrement will occur after each
3643 * index rebuild.
3645 bool
3646 reindex_relation(Oid relid, int flags, int options)
3648 Relation rel;
3649 Oid toast_relid;
3650 List *indexIds;
3651 char persistence;
3652 bool result;
3653 ListCell *indexId;
3654 int i;
3657 * Open and lock the relation. ShareLock is sufficient since we only need
3658 * to prevent schema and data changes in it. The lock level used here
3659 * should match ReindexTable().
3661 rel = table_open(relid, ShareLock);
3664 * This may be useful when implemented someday; but that day is not today.
3665 * For now, avoid erroring out when called in a multi-table context
3666 * (REINDEX SCHEMA) and happen to come across a partitioned table. The
3667 * partitions may be reindexed on their own anyway.
3669 if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
3671 ereport(WARNING,
3672 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3673 errmsg("REINDEX of partitioned tables is not yet implemented, skipping \"%s\"",
3674 RelationGetRelationName(rel))));
3675 table_close(rel, ShareLock);
3676 return false;
3679 toast_relid = rel->rd_rel->reltoastrelid;
3682 * Get the list of index OIDs for this relation. (We trust to the
3683 * relcache to get this with a sequential scan if ignoring system
3684 * indexes.)
3686 indexIds = RelationGetIndexList(rel);
3688 if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
3690 /* Suppress use of all the indexes until they are rebuilt */
3691 SetReindexPending(indexIds);
3694 * Make the new heap contents visible --- now things might be
3695 * inconsistent!
3697 CommandCounterIncrement();
3701 * Compute persistence of indexes: same as that of owning rel, unless
3702 * caller specified otherwise.
3704 if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
3705 persistence = RELPERSISTENCE_UNLOGGED;
3706 else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
3707 persistence = RELPERSISTENCE_PERMANENT;
3708 else
3709 persistence = rel->rd_rel->relpersistence;
3711 /* Reindex all the indexes. */
3712 i = 1;
3713 foreach(indexId, indexIds)
3715 Oid indexOid = lfirst_oid(indexId);
3716 Oid indexNamespaceId = get_rel_namespace(indexOid);
3719 * Skip any invalid indexes on a TOAST table. These can only be
3720 * duplicate leftovers from a failed REINDEX CONCURRENTLY, and if
3721 * rebuilt it would not be possible to drop them anymore.
3723 if (IsToastNamespace(indexNamespaceId) &&
3724 !get_index_isvalid(indexOid))
3726 ereport(WARNING,
3727 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3728 errmsg("cannot reindex invalid index \"%s.%s\" on TOAST table, skipping",
3729 get_namespace_name(indexNamespaceId),
3730 get_rel_name(indexOid))));
3731 continue;
3734 reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
3735 persistence, options);
3737 CommandCounterIncrement();
3739 /* Index should no longer be in the pending list */
3740 Assert(!ReindexIsProcessingIndex(indexOid));
3742 /* Set index rebuild count */
3743 pgstat_progress_update_param(PROGRESS_CLUSTER_INDEX_REBUILD_COUNT,
3745 i++;
3749 * Close rel, but continue to hold the lock.
3751 table_close(rel, NoLock);
3753 result = (indexIds != NIL);
3756 * If the relation has a secondary toast rel, reindex that too while we
3757 * still hold the lock on the main table.
3759 if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
3760 result |= reindex_relation(toast_relid, flags, options);
3762 return result;
3766 /* ----------------------------------------------------------------
3767 * System index reindexing support
3769 * When we are busy reindexing a system index, this code provides support
3770 * for preventing catalog lookups from using that index. We also make use
3771 * of this to catch attempted uses of user indexes during reindexing of
3772 * those indexes. This information is propagated to parallel workers;
3773 * attempting to change it during a parallel operation is not permitted.
3774 * ----------------------------------------------------------------
3777 static Oid currentlyReindexedHeap = InvalidOid;
3778 static Oid currentlyReindexedIndex = InvalidOid;
3779 static List *pendingReindexedIndexes = NIL;
3780 static int reindexingNestLevel = 0;
3783 * ReindexIsProcessingHeap
3784 * True if heap specified by OID is currently being reindexed.
3786 bool
3787 ReindexIsProcessingHeap(Oid heapOid)
3789 return heapOid == currentlyReindexedHeap;
3793 * ReindexIsCurrentlyProcessingIndex
3794 * True if index specified by OID is currently being reindexed.
3796 static bool
3797 ReindexIsCurrentlyProcessingIndex(Oid indexOid)
3799 return indexOid == currentlyReindexedIndex;
3803 * ReindexIsProcessingIndex
3804 * True if index specified by OID is currently being reindexed,
3805 * or should be treated as invalid because it is awaiting reindex.
3807 bool
3808 ReindexIsProcessingIndex(Oid indexOid)
3810 return indexOid == currentlyReindexedIndex ||
3811 list_member_oid(pendingReindexedIndexes, indexOid);
3815 * SetReindexProcessing
3816 * Set flag that specified heap/index are being reindexed.
3818 static void
3819 SetReindexProcessing(Oid heapOid, Oid indexOid)
3821 Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
3822 /* Reindexing is not re-entrant. */
3823 if (OidIsValid(currentlyReindexedHeap))
3824 elog(ERROR, "cannot reindex while reindexing");
3825 currentlyReindexedHeap = heapOid;
3826 currentlyReindexedIndex = indexOid;
3827 /* Index is no longer "pending" reindex. */
3828 RemoveReindexPending(indexOid);
3829 /* This may have been set already, but in case it isn't, do so now. */
3830 reindexingNestLevel = GetCurrentTransactionNestLevel();
3834 * ResetReindexProcessing
3835 * Unset reindexing status.
3837 static void
3838 ResetReindexProcessing(void)
3840 currentlyReindexedHeap = InvalidOid;
3841 currentlyReindexedIndex = InvalidOid;
3842 /* reindexingNestLevel remains set till end of (sub)transaction */
3846 * SetReindexPending
3847 * Mark the given indexes as pending reindex.
3849 * NB: we assume that the current memory context stays valid throughout.
3851 static void
3852 SetReindexPending(List *indexes)
3854 /* Reindexing is not re-entrant. */
3855 if (pendingReindexedIndexes)
3856 elog(ERROR, "cannot reindex while reindexing");
3857 if (IsInParallelMode())
3858 elog(ERROR, "cannot modify reindex state during a parallel operation");
3859 pendingReindexedIndexes = list_copy(indexes);
3860 reindexingNestLevel = GetCurrentTransactionNestLevel();
3864 * RemoveReindexPending
3865 * Remove the given index from the pending list.
3867 static void
3868 RemoveReindexPending(Oid indexOid)
3870 if (IsInParallelMode())
3871 elog(ERROR, "cannot modify reindex state during a parallel operation");
3872 pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
3873 indexOid);
3877 * ResetReindexState
3878 * Clear all reindexing state during (sub)transaction abort.
3880 void
3881 ResetReindexState(int nestLevel)
3884 * Because reindexing is not re-entrant, we don't need to cope with nested
3885 * reindexing states. We just need to avoid messing up the outer-level
3886 * state in case a subtransaction fails within a REINDEX. So checking the
3887 * current nest level against that of the reindex operation is sufficient.
3889 if (reindexingNestLevel >= nestLevel)
3891 currentlyReindexedHeap = InvalidOid;
3892 currentlyReindexedIndex = InvalidOid;
3895 * We needn't try to release the contents of pendingReindexedIndexes;
3896 * that list should be in a transaction-lifespan context, so it will
3897 * go away automatically.
3899 pendingReindexedIndexes = NIL;
3901 reindexingNestLevel = 0;
3906 * EstimateReindexStateSpace
3907 * Estimate space needed to pass reindex state to parallel workers.
3909 Size
3910 EstimateReindexStateSpace(void)
3912 return offsetof(SerializedReindexState, pendingReindexedIndexes)
3913 + mul_size(sizeof(Oid), list_length(pendingReindexedIndexes));
3917 * SerializeReindexState
3918 * Serialize reindex state for parallel workers.
3920 void
3921 SerializeReindexState(Size maxsize, char *start_address)
3923 SerializedReindexState *sistate = (SerializedReindexState *) start_address;
3924 int c = 0;
3925 ListCell *lc;
3927 sistate->currentlyReindexedHeap = currentlyReindexedHeap;
3928 sistate->currentlyReindexedIndex = currentlyReindexedIndex;
3929 sistate->numPendingReindexedIndexes = list_length(pendingReindexedIndexes);
3930 foreach(lc, pendingReindexedIndexes)
3931 sistate->pendingReindexedIndexes[c++] = lfirst_oid(lc);
3935 * RestoreReindexState
3936 * Restore reindex state in a parallel worker.
3938 void
3939 RestoreReindexState(void *reindexstate)
3941 SerializedReindexState *sistate = (SerializedReindexState *) reindexstate;
3942 int c = 0;
3943 MemoryContext oldcontext;
3945 currentlyReindexedHeap = sistate->currentlyReindexedHeap;
3946 currentlyReindexedIndex = sistate->currentlyReindexedIndex;
3948 Assert(pendingReindexedIndexes == NIL);
3949 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
3950 for (c = 0; c < sistate->numPendingReindexedIndexes; ++c)
3951 pendingReindexedIndexes =
3952 lappend_oid(pendingReindexedIndexes,
3953 sistate->pendingReindexedIndexes[c]);
3954 MemoryContextSwitchTo(oldcontext);
3956 /* Note the worker has its own transaction nesting level */
3957 reindexingNestLevel = GetCurrentTransactionNestLevel();