Update obsolete comment in index_drop(). When the comment was written,
[PostgreSQL.git] / src / backend / catalog / index.c
blob650c049ffe756a2a8944d16220b613ac3c37c4d2
1 /*-------------------------------------------------------------------------
3 * index.c
4 * code to create and destroy POSTGRES index relations
6 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * $PostgreSQL$
14 * INTERFACE ROUTINES
15 * index_create() - Create a cataloged index relation
16 * index_drop() - Removes index relation from catalogs
17 * BuildIndexInfo() - Prepare to insert index tuples
18 * FormIndexDatum() - Construct datum vector for one index tuple
20 *-------------------------------------------------------------------------
22 #include "postgres.h"
24 #include <unistd.h>
26 #include "access/genam.h"
27 #include "access/heapam.h"
28 #include "access/relscan.h"
29 #include "access/sysattr.h"
30 #include "access/transam.h"
31 #include "access/xact.h"
32 #include "bootstrap/bootstrap.h"
33 #include "catalog/catalog.h"
34 #include "catalog/dependency.h"
35 #include "catalog/heap.h"
36 #include "catalog/index.h"
37 #include "catalog/indexing.h"
38 #include "catalog/namespace.h"
39 #include "catalog/pg_constraint.h"
40 #include "catalog/pg_operator.h"
41 #include "catalog/pg_opclass.h"
42 #include "catalog/pg_tablespace.h"
43 #include "catalog/pg_type.h"
44 #include "catalog/storage.h"
45 #include "commands/tablecmds.h"
46 #include "executor/executor.h"
47 #include "miscadmin.h"
48 #include "nodes/nodeFuncs.h"
49 #include "optimizer/clauses.h"
50 #include "optimizer/var.h"
51 #include "storage/bufmgr.h"
52 #include "storage/lmgr.h"
53 #include "storage/procarray.h"
54 #include "storage/smgr.h"
55 #include "utils/builtins.h"
56 #include "utils/fmgroids.h"
57 #include "utils/inval.h"
58 #include "utils/lsyscache.h"
59 #include "utils/memutils.h"
60 #include "utils/relcache.h"
61 #include "utils/syscache.h"
62 #include "utils/tuplesort.h"
63 #include "utils/snapmgr.h"
64 #include "utils/tqual.h"
67 /* state info for validate_index bulkdelete callback */
68 typedef struct
70 Tuplesortstate *tuplesort; /* for sorting the index TIDs */
71 /* statistics (for debug purposes only): */
72 double htups,
73 itups,
74 tups_inserted;
75 } v_i_state;
77 /* non-export function prototypes */
78 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
79 IndexInfo *indexInfo,
80 Oid accessMethodObjectId,
81 Oid *classObjectId);
82 static void InitializeAttributeOids(Relation indexRelation,
83 int numatts, Oid indexoid);
84 static void AppendAttributeTuples(Relation indexRelation, int numatts);
85 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
86 IndexInfo *indexInfo,
87 Oid *classOids,
88 int16 *coloptions,
89 bool primary,
90 bool isvalid);
91 static void index_update_stats(Relation rel, bool hasindex, bool isprimary,
92 Oid reltoastidxid, double reltuples);
93 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
94 static void validate_index_heapscan(Relation heapRelation,
95 Relation indexRelation,
96 IndexInfo *indexInfo,
97 Snapshot snapshot,
98 v_i_state *state);
99 static Oid IndexGetRelation(Oid indexId);
103 * ConstructTupleDescriptor
105 * Build an index tuple descriptor for a new index
107 static TupleDesc
108 ConstructTupleDescriptor(Relation heapRelation,
109 IndexInfo *indexInfo,
110 Oid accessMethodObjectId,
111 Oid *classObjectId)
113 int numatts = indexInfo->ii_NumIndexAttrs;
114 ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
115 HeapTuple amtuple;
116 Form_pg_am amform;
117 TupleDesc heapTupDesc;
118 TupleDesc indexTupDesc;
119 int natts; /* #atts in heap rel --- for error checks */
120 int i;
122 /* We need access to the index AM's pg_am tuple */
123 amtuple = SearchSysCache(AMOID,
124 ObjectIdGetDatum(accessMethodObjectId),
125 0, 0, 0);
126 if (!HeapTupleIsValid(amtuple))
127 elog(ERROR, "cache lookup failed for access method %u",
128 accessMethodObjectId);
129 amform = (Form_pg_am) GETSTRUCT(amtuple);
131 /* ... and to the table's tuple descriptor */
132 heapTupDesc = RelationGetDescr(heapRelation);
133 natts = RelationGetForm(heapRelation)->relnatts;
136 * allocate the new tuple descriptor
138 indexTupDesc = CreateTemplateTupleDesc(numatts, false);
141 * For simple index columns, we copy the pg_attribute row from the parent
142 * relation and modify it as necessary. For expressions we have to cons
143 * up a pg_attribute row the hard way.
145 for (i = 0; i < numatts; i++)
147 AttrNumber atnum = indexInfo->ii_KeyAttrNumbers[i];
148 Form_pg_attribute to = indexTupDesc->attrs[i];
149 HeapTuple tuple;
150 Form_pg_type typeTup;
151 Form_pg_opclass opclassTup;
152 Oid keyType;
154 if (atnum != 0)
156 /* Simple index column */
157 Form_pg_attribute from;
159 if (atnum < 0)
162 * here we are indexing on a system attribute (-1...-n)
164 from = SystemAttributeDefinition(atnum,
165 heapRelation->rd_rel->relhasoids);
167 else
170 * here we are indexing on a normal attribute (1...n)
172 if (atnum > natts) /* safety check */
173 elog(ERROR, "invalid column number %d", atnum);
174 from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
178 * now that we've determined the "from", let's copy the tuple desc
179 * data...
181 memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
184 * Fix the stuff that should not be the same as the underlying
185 * attr
187 to->attnum = i + 1;
189 to->attstattarget = -1;
190 to->attcacheoff = -1;
191 to->attnotnull = false;
192 to->atthasdef = false;
193 to->attislocal = true;
194 to->attinhcount = 0;
196 else
198 /* Expressional index */
199 Node *indexkey;
201 MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
203 if (indexpr_item == NULL) /* shouldn't happen */
204 elog(ERROR, "too few entries in indexprs list");
205 indexkey = (Node *) lfirst(indexpr_item);
206 indexpr_item = lnext(indexpr_item);
209 * Make the attribute's name "pg_expresssion_nnn" (maybe think of
210 * something better later)
212 sprintf(NameStr(to->attname), "pg_expression_%d", i + 1);
215 * Lookup the expression type in pg_type for the type length etc.
217 keyType = exprType(indexkey);
218 tuple = SearchSysCache(TYPEOID,
219 ObjectIdGetDatum(keyType),
220 0, 0, 0);
221 if (!HeapTupleIsValid(tuple))
222 elog(ERROR, "cache lookup failed for type %u", keyType);
223 typeTup = (Form_pg_type) GETSTRUCT(tuple);
226 * Assign some of the attributes values. Leave the rest as 0.
228 to->attnum = i + 1;
229 to->atttypid = keyType;
230 to->attlen = typeTup->typlen;
231 to->attbyval = typeTup->typbyval;
232 to->attstorage = typeTup->typstorage;
233 to->attalign = typeTup->typalign;
234 to->attstattarget = -1;
235 to->attcacheoff = -1;
236 to->atttypmod = -1;
237 to->attislocal = true;
239 ReleaseSysCache(tuple);
242 * Make sure the expression yields a type that's safe to store in
243 * an index. We need this defense because we have index opclasses
244 * for pseudo-types such as "record", and the actually stored type
245 * had better be safe; eg, a named composite type is okay, an
246 * anonymous record type is not. The test is the same as for
247 * whether a table column is of a safe type (which is why we
248 * needn't check for the non-expression case).
250 CheckAttributeType(NameStr(to->attname), to->atttypid);
254 * We do not yet have the correct relation OID for the index, so just
255 * set it invalid for now. InitializeAttributeOids() will fix it
256 * later.
258 to->attrelid = InvalidOid;
261 * Check the opclass and index AM to see if either provides a keytype
262 * (overriding the attribute type). Opclass takes precedence.
264 tuple = SearchSysCache(CLAOID,
265 ObjectIdGetDatum(classObjectId[i]),
266 0, 0, 0);
267 if (!HeapTupleIsValid(tuple))
268 elog(ERROR, "cache lookup failed for opclass %u",
269 classObjectId[i]);
270 opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
271 if (OidIsValid(opclassTup->opckeytype))
272 keyType = opclassTup->opckeytype;
273 else
274 keyType = amform->amkeytype;
275 ReleaseSysCache(tuple);
277 if (OidIsValid(keyType) && keyType != to->atttypid)
279 /* index value and heap value have different types */
280 tuple = SearchSysCache(TYPEOID,
281 ObjectIdGetDatum(keyType),
282 0, 0, 0);
283 if (!HeapTupleIsValid(tuple))
284 elog(ERROR, "cache lookup failed for type %u", keyType);
285 typeTup = (Form_pg_type) GETSTRUCT(tuple);
287 to->atttypid = keyType;
288 to->atttypmod = -1;
289 to->attlen = typeTup->typlen;
290 to->attbyval = typeTup->typbyval;
291 to->attalign = typeTup->typalign;
292 to->attstorage = typeTup->typstorage;
294 ReleaseSysCache(tuple);
298 ReleaseSysCache(amtuple);
300 return indexTupDesc;
303 /* ----------------------------------------------------------------
304 * InitializeAttributeOids
305 * ----------------------------------------------------------------
307 static void
308 InitializeAttributeOids(Relation indexRelation,
309 int numatts,
310 Oid indexoid)
312 TupleDesc tupleDescriptor;
313 int i;
315 tupleDescriptor = RelationGetDescr(indexRelation);
317 for (i = 0; i < numatts; i += 1)
318 tupleDescriptor->attrs[i]->attrelid = indexoid;
321 /* ----------------------------------------------------------------
322 * AppendAttributeTuples
323 * ----------------------------------------------------------------
325 static void
326 AppendAttributeTuples(Relation indexRelation, int numatts)
328 Relation pg_attribute;
329 CatalogIndexState indstate;
330 TupleDesc indexTupDesc;
331 int i;
334 * open the attribute relation and its indexes
336 pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
338 indstate = CatalogOpenIndexes(pg_attribute);
341 * insert data from new index's tupdesc into pg_attribute
343 indexTupDesc = RelationGetDescr(indexRelation);
345 for (i = 0; i < numatts; i++)
348 * There used to be very grotty code here to set these fields, but I
349 * think it's unnecessary. They should be set already.
351 Assert(indexTupDesc->attrs[i]->attnum == i + 1);
352 Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
354 InsertPgAttributeTuple(pg_attribute, indexTupDesc->attrs[i], indstate);
357 CatalogCloseIndexes(indstate);
359 heap_close(pg_attribute, RowExclusiveLock);
362 /* ----------------------------------------------------------------
363 * UpdateIndexRelation
365 * Construct and insert a new entry in the pg_index catalog
366 * ----------------------------------------------------------------
368 static void
369 UpdateIndexRelation(Oid indexoid,
370 Oid heapoid,
371 IndexInfo *indexInfo,
372 Oid *classOids,
373 int16 *coloptions,
374 bool primary,
375 bool isvalid)
377 int2vector *indkey;
378 oidvector *indclass;
379 int2vector *indoption;
380 Datum exprsDatum;
381 Datum predDatum;
382 Datum values[Natts_pg_index];
383 bool nulls[Natts_pg_index];
384 Relation pg_index;
385 HeapTuple tuple;
386 int i;
389 * Copy the index key, opclass, and indoption info into arrays (should we
390 * make the caller pass them like this to start with?)
392 indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
393 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
394 indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
395 indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
396 indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs);
399 * Convert the index expressions (if any) to a text datum
401 if (indexInfo->ii_Expressions != NIL)
403 char *exprsString;
405 exprsString = nodeToString(indexInfo->ii_Expressions);
406 exprsDatum = CStringGetTextDatum(exprsString);
407 pfree(exprsString);
409 else
410 exprsDatum = (Datum) 0;
413 * Convert the index predicate (if any) to a text datum. Note we convert
414 * implicit-AND format to normal explicit-AND for storage.
416 if (indexInfo->ii_Predicate != NIL)
418 char *predString;
420 predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
421 predDatum = CStringGetTextDatum(predString);
422 pfree(predString);
424 else
425 predDatum = (Datum) 0;
428 * open the system catalog index relation
430 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
433 * Build a pg_index tuple
435 MemSet(nulls, false, sizeof(nulls));
437 values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
438 values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
439 values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
440 values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
441 values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
442 values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
443 values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
444 values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
445 /* we set isvalid and isready the same way */
446 values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid);
447 values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
448 values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
449 values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
450 values[Anum_pg_index_indexprs - 1] = exprsDatum;
451 if (exprsDatum == (Datum) 0)
452 nulls[Anum_pg_index_indexprs - 1] = true;
453 values[Anum_pg_index_indpred - 1] = predDatum;
454 if (predDatum == (Datum) 0)
455 nulls[Anum_pg_index_indpred - 1] = true;
457 tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
460 * insert the tuple into the pg_index catalog
462 simple_heap_insert(pg_index, tuple);
464 /* update the indexes on pg_index */
465 CatalogUpdateIndexes(pg_index, tuple);
468 * close the relation and free the tuple
470 heap_close(pg_index, RowExclusiveLock);
471 heap_freetuple(tuple);
476 * index_create
478 * heapRelationId: OID of table to build index on
479 * indexRelationName: what it say
480 * indexRelationId: normally, pass InvalidOid to let this routine
481 * generate an OID for the index. During bootstrap this may be
482 * nonzero to specify a preselected OID.
483 * indexInfo: same info executor uses to insert into the index
484 * accessMethodObjectId: OID of index AM to use
485 * tableSpaceId: OID of tablespace to use
486 * classObjectId: array of index opclass OIDs, one per index column
487 * coloptions: array of per-index-column indoption settings
488 * reloptions: AM-specific options
489 * isprimary: index is a PRIMARY KEY
490 * isconstraint: index is owned by a PRIMARY KEY or UNIQUE constraint
491 * allow_system_table_mods: allow table to be a system catalog
492 * skip_build: true to skip the index_build() step for the moment; caller
493 * must do it later (typically via reindex_index())
494 * concurrent: if true, do not lock the table against writers. The index
495 * will be marked "invalid" and the caller must take additional steps
496 * to fix it up.
498 * Returns OID of the created index.
501 index_create(Oid heapRelationId,
502 const char *indexRelationName,
503 Oid indexRelationId,
504 IndexInfo *indexInfo,
505 Oid accessMethodObjectId,
506 Oid tableSpaceId,
507 Oid *classObjectId,
508 int16 *coloptions,
509 Datum reloptions,
510 bool isprimary,
511 bool isconstraint,
512 bool allow_system_table_mods,
513 bool skip_build,
514 bool concurrent)
516 Relation pg_class;
517 Relation heapRelation;
518 Relation indexRelation;
519 TupleDesc indexTupDesc;
520 bool shared_relation;
521 Oid namespaceId;
522 int i;
524 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
527 * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
528 * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
529 * (but not VACUUM).
531 heapRelation = heap_open(heapRelationId,
532 (concurrent ? ShareUpdateExclusiveLock : ShareLock));
535 * The index will be in the same namespace as its parent table, and is
536 * shared across databases if and only if the parent is.
538 namespaceId = RelationGetNamespace(heapRelation);
539 shared_relation = heapRelation->rd_rel->relisshared;
542 * check parameters
544 if (indexInfo->ii_NumIndexAttrs < 1)
545 elog(ERROR, "must index at least one column");
547 if (!allow_system_table_mods &&
548 IsSystemRelation(heapRelation) &&
549 IsNormalProcessingMode())
550 ereport(ERROR,
551 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
552 errmsg("user-defined indexes on system catalog tables are not supported")));
555 * concurrent index build on a system catalog is unsafe because we tend to
556 * release locks before committing in catalogs
558 if (concurrent &&
559 IsSystemRelation(heapRelation))
560 ereport(ERROR,
561 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
562 errmsg("concurrent index creation on system catalog tables is not supported")));
565 * We cannot allow indexing a shared relation after initdb (because
566 * there's no way to make the entry in other databases' pg_class).
568 if (shared_relation && !IsBootstrapProcessingMode())
569 ereport(ERROR,
570 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
571 errmsg("shared indexes cannot be created after initdb")));
574 * Validate shared/non-shared tablespace (must check this before doing
575 * GetNewRelFileNode, to prevent Assert therein)
577 if (shared_relation)
579 if (tableSpaceId != GLOBALTABLESPACE_OID)
580 /* elog since this is not a user-facing error */
581 elog(ERROR,
582 "shared relations must be placed in pg_global tablespace");
584 else
586 if (tableSpaceId == GLOBALTABLESPACE_OID)
587 ereport(ERROR,
588 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
589 errmsg("only shared relations can be placed in pg_global tablespace")));
592 if (get_relname_relid(indexRelationName, namespaceId))
593 ereport(ERROR,
594 (errcode(ERRCODE_DUPLICATE_TABLE),
595 errmsg("relation \"%s\" already exists",
596 indexRelationName)));
599 * construct tuple descriptor for index tuples
601 indexTupDesc = ConstructTupleDescriptor(heapRelation,
602 indexInfo,
603 accessMethodObjectId,
604 classObjectId);
607 * Allocate an OID for the index, unless we were told what to use.
609 * The OID will be the relfilenode as well, so make sure it doesn't
610 * collide with either pg_class OIDs or existing physical files.
612 if (!OidIsValid(indexRelationId))
613 indexRelationId = GetNewRelFileNode(tableSpaceId, shared_relation,
614 pg_class);
617 * create the index relation's relcache entry and physical disk file. (If
618 * we fail further down, it's the smgr's responsibility to remove the disk
619 * file again.)
621 indexRelation = heap_create(indexRelationName,
622 namespaceId,
623 tableSpaceId,
624 indexRelationId,
625 indexTupDesc,
626 RELKIND_INDEX,
627 shared_relation,
628 allow_system_table_mods);
630 Assert(indexRelationId == RelationGetRelid(indexRelation));
633 * Obtain exclusive lock on it. Although no other backends can see it
634 * until we commit, this prevents deadlock-risk complaints from lock
635 * manager in cases such as CLUSTER.
637 LockRelation(indexRelation, AccessExclusiveLock);
640 * Fill in fields of the index's pg_class entry that are not set correctly
641 * by heap_create.
643 * XXX should have a cleaner way to create cataloged indexes
645 indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
646 indexRelation->rd_rel->relam = accessMethodObjectId;
647 indexRelation->rd_rel->relkind = RELKIND_INDEX;
648 indexRelation->rd_rel->relhasoids = false;
651 * store index's pg_class entry
653 InsertPgClassTuple(pg_class, indexRelation,
654 RelationGetRelid(indexRelation),
655 reloptions);
657 /* done with pg_class */
658 heap_close(pg_class, RowExclusiveLock);
661 * now update the object id's of all the attribute tuple forms in the
662 * index relation's tuple descriptor
664 InitializeAttributeOids(indexRelation,
665 indexInfo->ii_NumIndexAttrs,
666 indexRelationId);
669 * append ATTRIBUTE tuples for the index
671 AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
673 /* ----------------
674 * update pg_index
675 * (append INDEX tuple)
677 * Note that this stows away a representation of "predicate".
678 * (Or, could define a rule to maintain the predicate) --Nels, Feb '92
679 * ----------------
681 UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
682 classObjectId, coloptions, isprimary, !concurrent);
685 * Register constraint and dependencies for the index.
687 * If the index is from a CONSTRAINT clause, construct a pg_constraint
688 * entry. The index is then linked to the constraint, which in turn is
689 * linked to the table. If it's not a CONSTRAINT, make the dependency
690 * directly on the table.
692 * We don't need a dependency on the namespace, because there'll be an
693 * indirect dependency via our parent table.
695 * During bootstrap we can't register any dependencies, and we don't try
696 * to make a constraint either.
698 if (!IsBootstrapProcessingMode())
700 ObjectAddress myself,
701 referenced;
703 myself.classId = RelationRelationId;
704 myself.objectId = indexRelationId;
705 myself.objectSubId = 0;
707 if (isconstraint)
709 char constraintType;
710 Oid conOid;
712 if (isprimary)
713 constraintType = CONSTRAINT_PRIMARY;
714 else if (indexInfo->ii_Unique)
715 constraintType = CONSTRAINT_UNIQUE;
716 else
718 elog(ERROR, "constraint must be PRIMARY or UNIQUE");
719 constraintType = 0; /* keep compiler quiet */
722 /* Shouldn't have any expressions */
723 if (indexInfo->ii_Expressions)
724 elog(ERROR, "constraints cannot have index expressions");
726 conOid = CreateConstraintEntry(indexRelationName,
727 namespaceId,
728 constraintType,
729 false, /* isDeferrable */
730 false, /* isDeferred */
731 heapRelationId,
732 indexInfo->ii_KeyAttrNumbers,
733 indexInfo->ii_NumIndexAttrs,
734 InvalidOid, /* no domain */
735 InvalidOid, /* no foreign key */
736 NULL,
737 NULL,
738 NULL,
739 NULL,
741 ' ',
742 ' ',
743 ' ',
744 InvalidOid, /* no associated index */
745 NULL, /* no check constraint */
746 NULL,
747 NULL,
748 true, /* islocal */
749 0); /* inhcount */
751 referenced.classId = ConstraintRelationId;
752 referenced.objectId = conOid;
753 referenced.objectSubId = 0;
755 recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
757 else
759 bool have_simple_col = false;
761 /* Create auto dependencies on simply-referenced columns */
762 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
764 if (indexInfo->ii_KeyAttrNumbers[i] != 0)
766 referenced.classId = RelationRelationId;
767 referenced.objectId = heapRelationId;
768 referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
770 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
772 have_simple_col = true;
777 * It's possible for an index to not depend on any columns of the
778 * table at all, in which case we need to give it a dependency on
779 * the table as a whole; else it won't get dropped when the table
780 * is dropped. This edge case is not totally useless; for
781 * example, a unique index on a constant expression can serve to
782 * prevent a table from containing more than one row.
784 if (!have_simple_col &&
785 !contain_vars_of_level((Node *) indexInfo->ii_Expressions, 0) &&
786 !contain_vars_of_level((Node *) indexInfo->ii_Predicate, 0))
788 referenced.classId = RelationRelationId;
789 referenced.objectId = heapRelationId;
790 referenced.objectSubId = 0;
792 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
796 /* Store dependency on operator classes */
797 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
799 referenced.classId = OperatorClassRelationId;
800 referenced.objectId = classObjectId[i];
801 referenced.objectSubId = 0;
803 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
806 /* Store dependencies on anything mentioned in index expressions */
807 if (indexInfo->ii_Expressions)
809 recordDependencyOnSingleRelExpr(&myself,
810 (Node *) indexInfo->ii_Expressions,
811 heapRelationId,
812 DEPENDENCY_NORMAL,
813 DEPENDENCY_AUTO);
816 /* Store dependencies on anything mentioned in predicate */
817 if (indexInfo->ii_Predicate)
819 recordDependencyOnSingleRelExpr(&myself,
820 (Node *) indexInfo->ii_Predicate,
821 heapRelationId,
822 DEPENDENCY_NORMAL,
823 DEPENDENCY_AUTO);
828 * Advance the command counter so that we can see the newly-entered
829 * catalog tuples for the index.
831 CommandCounterIncrement();
834 * In bootstrap mode, we have to fill in the index strategy structure with
835 * information from the catalogs. If we aren't bootstrapping, then the
836 * relcache entry has already been rebuilt thanks to sinval update during
837 * CommandCounterIncrement.
839 if (IsBootstrapProcessingMode())
840 RelationInitIndexAccessInfo(indexRelation);
841 else
842 Assert(indexRelation->rd_indexcxt != NULL);
845 * If this is bootstrap (initdb) time, then we don't actually fill in the
846 * index yet. We'll be creating more indexes and classes later, so we
847 * delay filling them in until just before we're done with bootstrapping.
848 * Similarly, if the caller specified skip_build then filling the index is
849 * delayed till later (ALTER TABLE can save work in some cases with this).
850 * Otherwise, we call the AM routine that constructs the index.
852 if (IsBootstrapProcessingMode())
854 index_register(heapRelationId, indexRelationId, indexInfo);
856 else if (skip_build)
859 * Caller is responsible for filling the index later on. However,
860 * we'd better make sure that the heap relation is correctly marked as
861 * having an index.
863 index_update_stats(heapRelation,
864 true,
865 isprimary,
866 InvalidOid,
867 heapRelation->rd_rel->reltuples);
868 /* Make the above update visible */
869 CommandCounterIncrement();
871 else
873 index_build(heapRelation, indexRelation, indexInfo, isprimary);
877 * Close the heap and index; but we keep the locks that we acquired above
878 * until end of transaction.
880 index_close(indexRelation, NoLock);
881 heap_close(heapRelation, NoLock);
883 return indexRelationId;
887 * index_drop
889 * NOTE: this routine should now only be called through performDeletion(),
890 * else associated dependencies won't be cleaned up.
892 void
893 index_drop(Oid indexId)
895 Oid heapId;
896 Relation userHeapRelation;
897 Relation userIndexRelation;
898 Relation indexRelation;
899 HeapTuple tuple;
900 bool hasexprs;
903 * To drop an index safely, we must grab exclusive lock on its parent
904 * table. Exclusive lock on the index alone is insufficient because
905 * another backend might be about to execute a query on the parent table.
906 * If it relies on a previously cached list of index OIDs, then it could
907 * attempt to access the just-dropped index. We must therefore take a
908 * table lock strong enough to prevent all queries on the table from
909 * proceeding until we commit and send out a shared-cache-inval notice
910 * that will make them update their index lists.
912 heapId = IndexGetRelation(indexId);
913 userHeapRelation = heap_open(heapId, AccessExclusiveLock);
915 userIndexRelation = index_open(indexId, AccessExclusiveLock);
918 * Schedule physical removal of the files
920 RelationDropStorage(userIndexRelation);
923 * Close and flush the index's relcache entry, to ensure relcache doesn't
924 * try to rebuild it while we're deleting catalog entries. We keep the
925 * lock though.
927 index_close(userIndexRelation, NoLock);
929 RelationForgetRelation(indexId);
932 * fix INDEX relation, and check for expressional index
934 indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
936 tuple = SearchSysCache(INDEXRELID,
937 ObjectIdGetDatum(indexId),
938 0, 0, 0);
939 if (!HeapTupleIsValid(tuple))
940 elog(ERROR, "cache lookup failed for index %u", indexId);
942 hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
944 simple_heap_delete(indexRelation, &tuple->t_self);
946 ReleaseSysCache(tuple);
947 heap_close(indexRelation, RowExclusiveLock);
950 * if it has any expression columns, we might have stored statistics about
951 * them.
953 if (hasexprs)
954 RemoveStatistics(indexId, 0);
957 * fix ATTRIBUTE relation
959 DeleteAttributeTuples(indexId);
962 * fix RELATION relation
964 DeleteRelationTuple(indexId);
967 * We are presently too lazy to attempt to compute the new correct value
968 * of relhasindex (the next VACUUM will fix it if necessary). So there is
969 * no need to update the pg_class tuple for the owning relation. But we
970 * must send out a shared-cache-inval notice on the owning relation to
971 * ensure other backends update their relcache lists of indexes.
973 CacheInvalidateRelcache(userHeapRelation);
976 * Close owning rel, but keep lock
978 heap_close(userHeapRelation, NoLock);
981 /* ----------------------------------------------------------------
982 * index_build support
983 * ----------------------------------------------------------------
986 /* ----------------
987 * BuildIndexInfo
988 * Construct an IndexInfo record for an open index
990 * IndexInfo stores the information about the index that's needed by
991 * FormIndexDatum, which is used for both index_build() and later insertion
992 * of individual index tuples. Normally we build an IndexInfo for an index
993 * just once per command, and then use it for (potentially) many tuples.
994 * ----------------
996 IndexInfo *
997 BuildIndexInfo(Relation index)
999 IndexInfo *ii = makeNode(IndexInfo);
1000 Form_pg_index indexStruct = index->rd_index;
1001 int i;
1002 int numKeys;
1004 /* check the number of keys, and copy attr numbers into the IndexInfo */
1005 numKeys = indexStruct->indnatts;
1006 if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1007 elog(ERROR, "invalid indnatts %d for index %u",
1008 numKeys, RelationGetRelid(index));
1009 ii->ii_NumIndexAttrs = numKeys;
1010 for (i = 0; i < numKeys; i++)
1011 ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1013 /* fetch any expressions needed for expressional indexes */
1014 ii->ii_Expressions = RelationGetIndexExpressions(index);
1015 ii->ii_ExpressionsState = NIL;
1017 /* fetch index predicate if any */
1018 ii->ii_Predicate = RelationGetIndexPredicate(index);
1019 ii->ii_PredicateState = NIL;
1021 /* other info */
1022 ii->ii_Unique = indexStruct->indisunique;
1023 ii->ii_ReadyForInserts = indexStruct->indisready;
1025 /* initialize index-build state to default */
1026 ii->ii_Concurrent = false;
1027 ii->ii_BrokenHotChain = false;
1029 return ii;
1032 /* ----------------
1033 * FormIndexDatum
1034 * Construct values[] and isnull[] arrays for a new index tuple.
1036 * indexInfo Info about the index
1037 * slot Heap tuple for which we must prepare an index entry
1038 * estate executor state for evaluating any index expressions
1039 * values Array of index Datums (output area)
1040 * isnull Array of is-null indicators (output area)
1042 * When there are no index expressions, estate may be NULL. Otherwise it
1043 * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
1044 * context must point to the heap tuple passed in.
1046 * Notice we don't actually call index_form_tuple() here; we just prepare
1047 * its input arrays values[] and isnull[]. This is because the index AM
1048 * may wish to alter the data before storage.
1049 * ----------------
1051 void
1052 FormIndexDatum(IndexInfo *indexInfo,
1053 TupleTableSlot *slot,
1054 EState *estate,
1055 Datum *values,
1056 bool *isnull)
1058 ListCell *indexpr_item;
1059 int i;
1061 if (indexInfo->ii_Expressions != NIL &&
1062 indexInfo->ii_ExpressionsState == NIL)
1064 /* First time through, set up expression evaluation state */
1065 indexInfo->ii_ExpressionsState = (List *)
1066 ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
1067 estate);
1068 /* Check caller has set up context correctly */
1069 Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1071 indexpr_item = list_head(indexInfo->ii_ExpressionsState);
1073 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1075 int keycol = indexInfo->ii_KeyAttrNumbers[i];
1076 Datum iDatum;
1077 bool isNull;
1079 if (keycol != 0)
1082 * Plain index column; get the value we need directly from the
1083 * heap tuple.
1085 iDatum = slot_getattr(slot, keycol, &isNull);
1087 else
1090 * Index expression --- need to evaluate it.
1092 if (indexpr_item == NULL)
1093 elog(ERROR, "wrong number of index expressions");
1094 iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
1095 GetPerTupleExprContext(estate),
1096 &isNull,
1097 NULL);
1098 indexpr_item = lnext(indexpr_item);
1100 values[i] = iDatum;
1101 isnull[i] = isNull;
1104 if (indexpr_item != NULL)
1105 elog(ERROR, "wrong number of index expressions");
1110 * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
1112 * This routine updates the pg_class row of either an index or its parent
1113 * relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed
1114 * to ensure we can do all the necessary work in just one update.
1116 * hasindex: set relhasindex to this value
1117 * isprimary: if true, set relhaspkey true; else no change
1118 * reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
1119 * else no change
1120 * reltuples: set reltuples to this value
1122 * relpages is also updated (using RelationGetNumberOfBlocks()).
1124 * NOTE: an important side-effect of this operation is that an SI invalidation
1125 * message is sent out to all backends --- including me --- causing relcache
1126 * entries to be flushed or updated with the new data. This must happen even
1127 * if we find that no change is needed in the pg_class row. When updating
1128 * a heap entry, this ensures that other backends find out about the new
1129 * index. When updating an index, it's important because some index AMs
1130 * expect a relcache flush to occur after REINDEX.
1132 static void
1133 index_update_stats(Relation rel, bool hasindex, bool isprimary,
1134 Oid reltoastidxid, double reltuples)
1136 BlockNumber relpages = RelationGetNumberOfBlocks(rel);
1137 Oid relid = RelationGetRelid(rel);
1138 Relation pg_class;
1139 HeapTuple tuple;
1140 Form_pg_class rd_rel;
1141 bool dirty;
1144 * We always update the pg_class row using a non-transactional,
1145 * overwrite-in-place update. There are several reasons for this:
1147 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
1149 * 2. We could be reindexing pg_class itself, in which case we can't move
1150 * its pg_class row because CatalogUpdateIndexes might not know about all
1151 * the indexes yet (see reindex_relation).
1153 * 3. Because we execute CREATE INDEX with just share lock on the parent
1154 * rel (to allow concurrent index creations), an ordinary update could
1155 * suffer a tuple-concurrently-updated failure against another CREATE
1156 * INDEX committing at about the same time. We can avoid that by having
1157 * them both do nontransactional updates (we assume they will both be
1158 * trying to change the pg_class row to the same thing, so it doesn't
1159 * matter which goes first).
1161 * 4. Even with just a single CREATE INDEX, there's a risk factor because
1162 * someone else might be trying to open the rel while we commit, and this
1163 * creates a race condition as to whether he will see both or neither of
1164 * the pg_class row versions as valid. Again, a non-transactional update
1165 * avoids the risk. It is indeterminate which state of the row the other
1166 * process will see, but it doesn't matter (if he's only taking
1167 * AccessShareLock, then it's not critical that he see relhasindex true).
1169 * It is safe to use a non-transactional update even though our
1170 * transaction could still fail before committing. Setting relhasindex
1171 * true is safe even if there are no indexes (VACUUM will eventually fix
1172 * it), and of course the relpages and reltuples counts are correct (or at
1173 * least more so than the old values) regardless.
1176 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1179 * Make a copy of the tuple to update. Normally we use the syscache, but
1180 * we can't rely on that during bootstrap or while reindexing pg_class
1181 * itself.
1183 if (IsBootstrapProcessingMode() ||
1184 ReindexIsProcessingHeap(RelationRelationId))
1186 /* don't assume syscache will work */
1187 HeapScanDesc pg_class_scan;
1188 ScanKeyData key[1];
1190 ScanKeyInit(&key[0],
1191 ObjectIdAttributeNumber,
1192 BTEqualStrategyNumber, F_OIDEQ,
1193 ObjectIdGetDatum(relid));
1195 pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key);
1196 tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
1197 tuple = heap_copytuple(tuple);
1198 heap_endscan(pg_class_scan);
1200 else
1202 /* normal case, use syscache */
1203 tuple = SearchSysCacheCopy(RELOID,
1204 ObjectIdGetDatum(relid),
1205 0, 0, 0);
1208 if (!HeapTupleIsValid(tuple))
1209 elog(ERROR, "could not find tuple for relation %u", relid);
1210 rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1212 /* Apply required updates, if any, to copied tuple */
1214 dirty = false;
1215 if (rd_rel->relhasindex != hasindex)
1217 rd_rel->relhasindex = hasindex;
1218 dirty = true;
1220 if (isprimary)
1222 if (!rd_rel->relhaspkey)
1224 rd_rel->relhaspkey = true;
1225 dirty = true;
1228 if (OidIsValid(reltoastidxid))
1230 Assert(rd_rel->relkind == RELKIND_TOASTVALUE);
1231 if (rd_rel->reltoastidxid != reltoastidxid)
1233 rd_rel->reltoastidxid = reltoastidxid;
1234 dirty = true;
1237 if (rd_rel->reltuples != (float4) reltuples)
1239 rd_rel->reltuples = (float4) reltuples;
1240 dirty = true;
1242 if (rd_rel->relpages != (int32) relpages)
1244 rd_rel->relpages = (int32) relpages;
1245 dirty = true;
1249 * If anything changed, write out the tuple
1251 if (dirty)
1253 heap_inplace_update(pg_class, tuple);
1254 /* the above sends a cache inval message */
1256 else
1258 /* no need to change tuple, but force relcache inval anyway */
1259 CacheInvalidateRelcacheByTuple(tuple);
1262 heap_freetuple(tuple);
1264 heap_close(pg_class, RowExclusiveLock);
1268 * setNewRelfilenode - assign a new relfilenode value to the relation
1270 * Caller must already hold exclusive lock on the relation.
1272 * The relation is marked with relfrozenxid=freezeXid (InvalidTransactionId
1273 * must be passed for indexes)
1275 void
1276 setNewRelfilenode(Relation relation, TransactionId freezeXid)
1278 Oid newrelfilenode;
1279 RelFileNode newrnode;
1280 Relation pg_class;
1281 HeapTuple tuple;
1282 Form_pg_class rd_rel;
1284 /* Can't change relfilenode for nailed tables (indexes ok though) */
1285 Assert(!relation->rd_isnailed ||
1286 relation->rd_rel->relkind == RELKIND_INDEX);
1287 /* Can't change for shared tables or indexes */
1288 Assert(!relation->rd_rel->relisshared);
1289 /* Indexes must have Invalid frozenxid; other relations must not */
1290 Assert((relation->rd_rel->relkind == RELKIND_INDEX &&
1291 freezeXid == InvalidTransactionId) ||
1292 TransactionIdIsNormal(freezeXid));
1294 /* Allocate a new relfilenode */
1295 newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace,
1296 relation->rd_rel->relisshared,
1297 NULL);
1300 * Find the pg_class tuple for the given relation. This is not used
1301 * during bootstrap, so okay to use heap_update always.
1303 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1305 tuple = SearchSysCacheCopy(RELOID,
1306 ObjectIdGetDatum(RelationGetRelid(relation)),
1307 0, 0, 0);
1308 if (!HeapTupleIsValid(tuple))
1309 elog(ERROR, "could not find tuple for relation %u",
1310 RelationGetRelid(relation));
1311 rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1314 * ... and create storage for corresponding forks in the new relfilenode.
1316 * NOTE: any conflict in relfilenode value will be caught here
1318 newrnode = relation->rd_node;
1319 newrnode.relNode = newrelfilenode;
1322 * Create the main fork, like heap_create() does, and drop the old
1323 * storage.
1325 RelationCreateStorage(newrnode, relation->rd_istemp);
1326 smgrclosenode(newrnode);
1327 RelationDropStorage(relation);
1329 /* update the pg_class row */
1330 rd_rel->relfilenode = newrelfilenode;
1331 rd_rel->relpages = 0; /* it's empty until further notice */
1332 rd_rel->reltuples = 0;
1333 rd_rel->relfrozenxid = freezeXid;
1334 simple_heap_update(pg_class, &tuple->t_self, tuple);
1335 CatalogUpdateIndexes(pg_class, tuple);
1337 heap_freetuple(tuple);
1339 heap_close(pg_class, RowExclusiveLock);
1341 /* Make sure the relfilenode change is visible */
1342 CommandCounterIncrement();
1344 /* Mark the rel as having a new relfilenode in current transaction */
1345 RelationCacheMarkNewRelfilenode(relation);
1350 * index_build - invoke access-method-specific index build procedure
1352 * On entry, the index's catalog entries are valid, and its physical disk
1353 * file has been created but is empty. We call the AM-specific build
1354 * procedure to fill in the index contents. We then update the pg_class
1355 * entries of the index and heap relation as needed, using statistics
1356 * returned by ambuild as well as data passed by the caller.
1358 * Note: when reindexing an existing index, isprimary can be false;
1359 * the index is already properly marked and need not be re-marked.
1361 * Note: before Postgres 8.2, the passed-in heap and index Relations
1362 * were automatically closed by this routine. This is no longer the case.
1363 * The caller opened 'em, and the caller should close 'em.
1365 void
1366 index_build(Relation heapRelation,
1367 Relation indexRelation,
1368 IndexInfo *indexInfo,
1369 bool isprimary)
1371 RegProcedure procedure;
1372 IndexBuildResult *stats;
1373 Oid save_userid;
1374 bool save_secdefcxt;
1377 * sanity checks
1379 Assert(RelationIsValid(indexRelation));
1380 Assert(PointerIsValid(indexRelation->rd_am));
1382 procedure = indexRelation->rd_am->ambuild;
1383 Assert(RegProcedureIsValid(procedure));
1386 * Switch to the table owner's userid, so that any index functions are
1387 * run as that user.
1389 GetUserIdAndContext(&save_userid, &save_secdefcxt);
1390 SetUserIdAndContext(heapRelation->rd_rel->relowner, true);
1393 * Call the access method's build procedure
1395 stats = (IndexBuildResult *)
1396 DatumGetPointer(OidFunctionCall3(procedure,
1397 PointerGetDatum(heapRelation),
1398 PointerGetDatum(indexRelation),
1399 PointerGetDatum(indexInfo)));
1400 Assert(PointerIsValid(stats));
1402 /* Restore userid */
1403 SetUserIdAndContext(save_userid, save_secdefcxt);
1406 * If we found any potentially broken HOT chains, mark the index as not
1407 * being usable until the current transaction is below the event horizon.
1408 * See src/backend/access/heap/README.HOT for discussion.
1410 if (indexInfo->ii_BrokenHotChain)
1412 Oid indexId = RelationGetRelid(indexRelation);
1413 Relation pg_index;
1414 HeapTuple indexTuple;
1415 Form_pg_index indexForm;
1417 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1419 indexTuple = SearchSysCacheCopy(INDEXRELID,
1420 ObjectIdGetDatum(indexId),
1421 0, 0, 0);
1422 if (!HeapTupleIsValid(indexTuple))
1423 elog(ERROR, "cache lookup failed for index %u", indexId);
1424 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1426 indexForm->indcheckxmin = true;
1427 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
1428 CatalogUpdateIndexes(pg_index, indexTuple);
1430 heap_freetuple(indexTuple);
1431 heap_close(pg_index, RowExclusiveLock);
1435 * Update heap and index pg_class rows
1437 index_update_stats(heapRelation,
1438 true,
1439 isprimary,
1440 (heapRelation->rd_rel->relkind == RELKIND_TOASTVALUE) ?
1441 RelationGetRelid(indexRelation) : InvalidOid,
1442 stats->heap_tuples);
1444 index_update_stats(indexRelation,
1445 false,
1446 false,
1447 InvalidOid,
1448 stats->index_tuples);
1450 /* Make the updated versions visible */
1451 CommandCounterIncrement();
1456 * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
1458 * This is called back from an access-method-specific index build procedure
1459 * after the AM has done whatever setup it needs. The parent heap relation
1460 * is scanned to find tuples that should be entered into the index. Each
1461 * such tuple is passed to the AM's callback routine, which does the right
1462 * things to add it to the new index. After we return, the AM's index
1463 * build procedure does whatever cleanup is needed; in particular, it should
1464 * close the heap and index relations.
1466 * The total count of heap tuples is returned. This is for updating pg_class
1467 * statistics. (It's annoying not to be able to do that here, but we can't
1468 * do it until after the relation is closed.) Note that the index AM itself
1469 * must keep track of the number of index tuples; we don't do so here because
1470 * the AM might reject some of the tuples for its own reasons, such as being
1471 * unable to store NULLs.
1473 * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
1474 * any potentially broken HOT chains. Currently, we set this if there are
1475 * any RECENTLY_DEAD entries in a HOT chain, without trying very hard to
1476 * detect whether they're really incompatible with the chain tip.
1478 double
1479 IndexBuildHeapScan(Relation heapRelation,
1480 Relation indexRelation,
1481 IndexInfo *indexInfo,
1482 bool allow_sync,
1483 IndexBuildCallback callback,
1484 void *callback_state)
1486 HeapScanDesc scan;
1487 HeapTuple heapTuple;
1488 Datum values[INDEX_MAX_KEYS];
1489 bool isnull[INDEX_MAX_KEYS];
1490 double reltuples;
1491 List *predicate;
1492 TupleTableSlot *slot;
1493 EState *estate;
1494 ExprContext *econtext;
1495 Snapshot snapshot;
1496 TransactionId OldestXmin;
1497 BlockNumber root_blkno = InvalidBlockNumber;
1498 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1501 * sanity checks
1503 Assert(OidIsValid(indexRelation->rd_rel->relam));
1506 * Need an EState for evaluation of index expressions and partial-index
1507 * predicates. Also a slot to hold the current tuple.
1509 estate = CreateExecutorState();
1510 econtext = GetPerTupleExprContext(estate);
1511 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
1513 /* Arrange for econtext's scan tuple to be the tuple under test */
1514 econtext->ecxt_scantuple = slot;
1516 /* Set up execution state for predicate, if any. */
1517 predicate = (List *)
1518 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
1519 estate);
1522 * Prepare for scan of the base relation. In a normal index build, we use
1523 * SnapshotAny because we must retrieve all tuples and do our own time
1524 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1525 * concurrent build, we take a regular MVCC snapshot and index whatever's
1526 * live according to that. During bootstrap we just use SnapshotNow.
1528 if (IsBootstrapProcessingMode())
1530 snapshot = SnapshotNow;
1531 OldestXmin = InvalidTransactionId; /* not used */
1533 else if (indexInfo->ii_Concurrent)
1535 snapshot = RegisterSnapshot(GetTransactionSnapshot());
1536 OldestXmin = InvalidTransactionId; /* not used */
1538 else
1540 snapshot = SnapshotAny;
1541 /* okay to ignore lazy VACUUMs here */
1542 OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
1545 scan = heap_beginscan_strat(heapRelation, /* relation */
1546 snapshot, /* snapshot */
1547 0, /* number of keys */
1548 NULL, /* scan key */
1549 true, /* buffer access strategy OK */
1550 allow_sync); /* syncscan OK? */
1552 reltuples = 0;
1555 * Scan all tuples in the base relation.
1557 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1559 bool tupleIsAlive;
1561 CHECK_FOR_INTERRUPTS();
1564 * When dealing with a HOT-chain of updated tuples, we want to index
1565 * the values of the live tuple (if any), but index it under the TID
1566 * of the chain's root tuple. This approach is necessary to preserve
1567 * the HOT-chain structure in the heap. So we need to be able to find
1568 * the root item offset for every tuple that's in a HOT-chain. When
1569 * first reaching a new page of the relation, call
1570 * heap_get_root_tuples() to build a map of root item offsets on the
1571 * page.
1573 * It might look unsafe to use this information across buffer
1574 * lock/unlock. However, we hold ShareLock on the table so no
1575 * ordinary insert/update/delete should occur; and we hold pin on the
1576 * buffer continuously while visiting the page, so no pruning
1577 * operation can occur either.
1579 * Note the implied assumption that there is no more than one live
1580 * tuple per HOT-chain ...
1582 if (scan->rs_cblock != root_blkno)
1584 Page page = BufferGetPage(scan->rs_cbuf);
1586 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
1587 heap_get_root_tuples(page, root_offsets);
1588 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1590 root_blkno = scan->rs_cblock;
1593 if (snapshot == SnapshotAny)
1595 /* do our own time qual check */
1596 bool indexIt;
1598 recheck:
1601 * We could possibly get away with not locking the buffer here,
1602 * since caller should hold ShareLock on the relation, but let's
1603 * be conservative about it. (This remark is still correct even
1604 * with HOT-pruning: our pin on the buffer prevents pruning.)
1606 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
1608 switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, OldestXmin,
1609 scan->rs_cbuf))
1611 case HEAPTUPLE_DEAD:
1612 /* Definitely dead, we can ignore it */
1613 indexIt = false;
1614 tupleIsAlive = false;
1615 break;
1616 case HEAPTUPLE_LIVE:
1617 /* Normal case, index and unique-check it */
1618 indexIt = true;
1619 tupleIsAlive = true;
1620 break;
1621 case HEAPTUPLE_RECENTLY_DEAD:
1624 * If tuple is recently deleted then we must index it
1625 * anyway to preserve MVCC semantics. (Pre-existing
1626 * transactions could try to use the index after we finish
1627 * building it, and may need to see such tuples.)
1629 * However, if it was HOT-updated then we must only index
1630 * the live tuple at the end of the HOT-chain. Since this
1631 * breaks semantics for pre-existing snapshots, mark the
1632 * index as unusable for them.
1634 * If we've already decided that the index will be unsafe
1635 * for old snapshots, we may as well stop indexing
1636 * recently-dead tuples, since there's no longer any
1637 * point.
1639 if (HeapTupleIsHotUpdated(heapTuple))
1641 indexIt = false;
1642 /* mark the index as unsafe for old snapshots */
1643 indexInfo->ii_BrokenHotChain = true;
1645 else if (indexInfo->ii_BrokenHotChain)
1646 indexIt = false;
1647 else
1648 indexIt = true;
1649 /* In any case, exclude the tuple from unique-checking */
1650 tupleIsAlive = false;
1651 break;
1652 case HEAPTUPLE_INSERT_IN_PROGRESS:
1655 * Since caller should hold ShareLock or better, we should
1656 * not see any tuples inserted by open transactions ---
1657 * unless it's our own transaction. (Consider INSERT
1658 * followed by CREATE INDEX within a transaction.) An
1659 * exception occurs when reindexing a system catalog,
1660 * because we often release lock on system catalogs before
1661 * committing. In that case we wait for the inserting
1662 * transaction to finish and check again. (We could do
1663 * that on user tables too, but since the case is not
1664 * expected it seems better to throw an error.)
1666 if (!TransactionIdIsCurrentTransactionId(
1667 HeapTupleHeaderGetXmin(heapTuple->t_data)))
1669 if (!IsSystemRelation(heapRelation))
1670 elog(ERROR, "concurrent insert in progress");
1671 else
1674 * Must drop the lock on the buffer before we wait
1676 TransactionId xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1678 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1679 XactLockTableWait(xwait);
1680 goto recheck;
1685 * We must index such tuples, since if the index build
1686 * commits then they're good.
1688 indexIt = true;
1689 tupleIsAlive = true;
1690 break;
1691 case HEAPTUPLE_DELETE_IN_PROGRESS:
1694 * Since caller should hold ShareLock or better, we should
1695 * not see any tuples deleted by open transactions ---
1696 * unless it's our own transaction. (Consider DELETE
1697 * followed by CREATE INDEX within a transaction.) An
1698 * exception occurs when reindexing a system catalog,
1699 * because we often release lock on system catalogs before
1700 * committing. In that case we wait for the deleting
1701 * transaction to finish and check again. (We could do
1702 * that on user tables too, but since the case is not
1703 * expected it seems better to throw an error.)
1705 Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
1706 if (!TransactionIdIsCurrentTransactionId(
1707 HeapTupleHeaderGetXmax(heapTuple->t_data)))
1709 if (!IsSystemRelation(heapRelation))
1710 elog(ERROR, "concurrent delete in progress");
1711 else
1714 * Must drop the lock on the buffer before we wait
1716 TransactionId xwait = HeapTupleHeaderGetXmax(heapTuple->t_data);
1718 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1719 XactLockTableWait(xwait);
1720 goto recheck;
1725 * Otherwise, we have to treat these tuples just like
1726 * RECENTLY_DELETED ones.
1728 if (HeapTupleIsHotUpdated(heapTuple))
1730 indexIt = false;
1731 /* mark the index as unsafe for old snapshots */
1732 indexInfo->ii_BrokenHotChain = true;
1734 else if (indexInfo->ii_BrokenHotChain)
1735 indexIt = false;
1736 else
1737 indexIt = true;
1738 /* In any case, exclude the tuple from unique-checking */
1739 tupleIsAlive = false;
1740 break;
1741 default:
1742 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1743 indexIt = tupleIsAlive = false; /* keep compiler quiet */
1744 break;
1747 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1749 if (!indexIt)
1750 continue;
1752 else
1754 /* heap_getnext did the time qual check */
1755 tupleIsAlive = true;
1758 reltuples += 1;
1760 MemoryContextReset(econtext->ecxt_per_tuple_memory);
1762 /* Set up for predicate or expression evaluation */
1763 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
1766 * In a partial index, discard tuples that don't satisfy the
1767 * predicate.
1769 if (predicate != NIL)
1771 if (!ExecQual(predicate, econtext, false))
1772 continue;
1776 * For the current heap tuple, extract all the attributes we use in
1777 * this index, and note which are null. This also performs evaluation
1778 * of any expressions needed.
1780 FormIndexDatum(indexInfo,
1781 slot,
1782 estate,
1783 values,
1784 isnull);
1787 * You'd think we should go ahead and build the index tuple here, but
1788 * some index AMs want to do further processing on the data first. So
1789 * pass the values[] and isnull[] arrays, instead.
1792 if (HeapTupleIsHeapOnly(heapTuple))
1795 * For a heap-only tuple, pretend its TID is that of the root. See
1796 * src/backend/access/heap/README.HOT for discussion.
1798 HeapTupleData rootTuple;
1799 OffsetNumber offnum;
1801 rootTuple = *heapTuple;
1802 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1804 Assert(OffsetNumberIsValid(root_offsets[offnum - 1]));
1806 ItemPointerSetOffsetNumber(&rootTuple.t_self,
1807 root_offsets[offnum - 1]);
1809 /* Call the AM's callback routine to process the tuple */
1810 callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
1811 callback_state);
1813 else
1815 /* Call the AM's callback routine to process the tuple */
1816 callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
1817 callback_state);
1821 heap_endscan(scan);
1823 /* we can now forget our snapshot, if set */
1824 if (indexInfo->ii_Concurrent)
1825 UnregisterSnapshot(snapshot);
1827 ExecDropSingleTupleTableSlot(slot);
1829 FreeExecutorState(estate);
1831 /* These may have been pointing to the now-gone estate */
1832 indexInfo->ii_ExpressionsState = NIL;
1833 indexInfo->ii_PredicateState = NIL;
1835 return reltuples;
1840 * validate_index - support code for concurrent index builds
1842 * We do a concurrent index build by first inserting the catalog entry for the
1843 * index via index_create(), marking it not indisready and not indisvalid.
1844 * Then we commit our transaction and start a new one, then we wait for all
1845 * transactions that could have been modifying the table to terminate. Now
1846 * we know that any subsequently-started transactions will see the index and
1847 * honor its constraints on HOT updates; so while existing HOT-chains might
1848 * be broken with respect to the index, no currently live tuple will have an
1849 * incompatible HOT update done to it. We now build the index normally via
1850 * index_build(), while holding a weak lock that allows concurrent
1851 * insert/update/delete. Also, we index only tuples that are valid
1852 * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
1853 * build takes care to include recently-dead tuples. This is OK because
1854 * we won't mark the index valid until all transactions that might be able
1855 * to see those tuples are gone. The reason for doing that is to avoid
1856 * bogus unique-index failures due to concurrent UPDATEs (we might see
1857 * different versions of the same row as being valid when we pass over them,
1858 * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
1859 * does not contain any tuples added to the table while we built the index.
1861 * Next, we mark the index "indisready" (but still not "indisvalid") and
1862 * commit the second transaction and start a third. Again we wait for all
1863 * transactions that could have been modifying the table to terminate. Now
1864 * we know that any subsequently-started transactions will see the index and
1865 * insert their new tuples into it. We then take a new reference snapshot
1866 * which is passed to validate_index(). Any tuples that are valid according
1867 * to this snap, but are not in the index, must be added to the index.
1868 * (Any tuples committed live after the snap will be inserted into the
1869 * index by their originating transaction. Any tuples committed dead before
1870 * the snap need not be indexed, because we will wait out all transactions
1871 * that might care about them before we mark the index valid.)
1873 * validate_index() works by first gathering all the TIDs currently in the
1874 * index, using a bulkdelete callback that just stores the TIDs and doesn't
1875 * ever say "delete it". (This should be faster than a plain indexscan;
1876 * also, not all index AMs support full-index indexscan.) Then we sort the
1877 * TIDs, and finally scan the table doing a "merge join" against the TID list
1878 * to see which tuples are missing from the index. Thus we will ensure that
1879 * all tuples valid according to the reference snapshot are in the index.
1881 * Building a unique index this way is tricky: we might try to insert a
1882 * tuple that is already dead or is in process of being deleted, and we
1883 * mustn't have a uniqueness failure against an updated version of the same
1884 * row. We could try to check the tuple to see if it's already dead and tell
1885 * index_insert() not to do the uniqueness check, but that still leaves us
1886 * with a race condition against an in-progress update. To handle that,
1887 * we expect the index AM to recheck liveness of the to-be-inserted tuple
1888 * before it declares a uniqueness error.
1890 * After completing validate_index(), we wait until all transactions that
1891 * were alive at the time of the reference snapshot are gone; this is
1892 * necessary to be sure there are none left with a serializable snapshot
1893 * older than the reference (and hence possibly able to see tuples we did
1894 * not index). Then we mark the index "indisvalid" and commit. Subsequent
1895 * transactions will be able to use it for queries.
1897 * Doing two full table scans is a brute-force strategy. We could try to be
1898 * cleverer, eg storing new tuples in a special area of the table (perhaps
1899 * making the table append-only by setting use_fsm). However that would
1900 * add yet more locking issues.
1902 void
1903 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
1905 Relation heapRelation,
1906 indexRelation;
1907 IndexInfo *indexInfo;
1908 IndexVacuumInfo ivinfo;
1909 v_i_state state;
1910 Oid save_userid;
1911 bool save_secdefcxt;
1913 /* Open and lock the parent heap relation */
1914 heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1915 /* And the target index relation */
1916 indexRelation = index_open(indexId, RowExclusiveLock);
1919 * Fetch info needed for index_insert. (You might think this should be
1920 * passed in from DefineIndex, but its copy is long gone due to having
1921 * been built in a previous transaction.)
1923 indexInfo = BuildIndexInfo(indexRelation);
1925 /* mark build is concurrent just for consistency */
1926 indexInfo->ii_Concurrent = true;
1929 * Switch to the table owner's userid, so that any index functions are
1930 * run as that user.
1932 GetUserIdAndContext(&save_userid, &save_secdefcxt);
1933 SetUserIdAndContext(heapRelation->rd_rel->relowner, true);
1936 * Scan the index and gather up all the TIDs into a tuplesort object.
1938 ivinfo.index = indexRelation;
1939 ivinfo.vacuum_full = false;
1940 ivinfo.analyze_only = false;
1941 ivinfo.message_level = DEBUG2;
1942 ivinfo.num_heap_tuples = -1;
1943 ivinfo.strategy = NULL;
1945 state.tuplesort = tuplesort_begin_datum(TIDOID,
1946 TIDLessOperator, false,
1947 maintenance_work_mem,
1948 false);
1949 state.htups = state.itups = state.tups_inserted = 0;
1951 (void) index_bulk_delete(&ivinfo, NULL,
1952 validate_index_callback, (void *) &state);
1954 /* Execute the sort */
1955 tuplesort_performsort(state.tuplesort);
1958 * Now scan the heap and "merge" it with the index
1960 validate_index_heapscan(heapRelation,
1961 indexRelation,
1962 indexInfo,
1963 snapshot,
1964 &state);
1966 /* Done with tuplesort object */
1967 tuplesort_end(state.tuplesort);
1969 elog(DEBUG2,
1970 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
1971 state.htups, state.itups, state.tups_inserted);
1973 /* Restore userid */
1974 SetUserIdAndContext(save_userid, save_secdefcxt);
1976 /* Close rels, but keep locks */
1977 index_close(indexRelation, NoLock);
1978 heap_close(heapRelation, NoLock);
1982 * validate_index_callback - bulkdelete callback to collect the index TIDs
1984 static bool
1985 validate_index_callback(ItemPointer itemptr, void *opaque)
1987 v_i_state *state = (v_i_state *) opaque;
1989 tuplesort_putdatum(state->tuplesort, PointerGetDatum(itemptr), false);
1990 state->itups += 1;
1991 return false; /* never actually delete anything */
1995 * validate_index_heapscan - second table scan for concurrent index build
1997 * This has much code in common with IndexBuildHeapScan, but it's enough
1998 * different that it seems cleaner to have two routines not one.
2000 static void
2001 validate_index_heapscan(Relation heapRelation,
2002 Relation indexRelation,
2003 IndexInfo *indexInfo,
2004 Snapshot snapshot,
2005 v_i_state *state)
2007 HeapScanDesc scan;
2008 HeapTuple heapTuple;
2009 Datum values[INDEX_MAX_KEYS];
2010 bool isnull[INDEX_MAX_KEYS];
2011 List *predicate;
2012 TupleTableSlot *slot;
2013 EState *estate;
2014 ExprContext *econtext;
2015 BlockNumber root_blkno = InvalidBlockNumber;
2016 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2017 bool in_index[MaxHeapTuplesPerPage];
2019 /* state variables for the merge */
2020 ItemPointer indexcursor = NULL;
2021 bool tuplesort_empty = false;
2024 * sanity checks
2026 Assert(OidIsValid(indexRelation->rd_rel->relam));
2029 * Need an EState for evaluation of index expressions and partial-index
2030 * predicates. Also a slot to hold the current tuple.
2032 estate = CreateExecutorState();
2033 econtext = GetPerTupleExprContext(estate);
2034 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2036 /* Arrange for econtext's scan tuple to be the tuple under test */
2037 econtext->ecxt_scantuple = slot;
2039 /* Set up execution state for predicate, if any. */
2040 predicate = (List *)
2041 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2042 estate);
2045 * Prepare for scan of the base relation. We need just those tuples
2046 * satisfying the passed-in reference snapshot. We must disable syncscan
2047 * here, because it's critical that we read from block zero forward to
2048 * match the sorted TIDs.
2050 scan = heap_beginscan_strat(heapRelation, /* relation */
2051 snapshot, /* snapshot */
2052 0, /* number of keys */
2053 NULL, /* scan key */
2054 true, /* buffer access strategy OK */
2055 false); /* syncscan not OK */
2058 * Scan all tuples matching the snapshot.
2060 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2062 ItemPointer heapcursor = &heapTuple->t_self;
2063 ItemPointerData rootTuple;
2064 OffsetNumber root_offnum;
2066 CHECK_FOR_INTERRUPTS();
2068 state->htups += 1;
2071 * As commented in IndexBuildHeapScan, we should index heap-only
2072 * tuples under the TIDs of their root tuples; so when we advance onto
2073 * a new heap page, build a map of root item offsets on the page.
2075 * This complicates merging against the tuplesort output: we will
2076 * visit the live tuples in order by their offsets, but the root
2077 * offsets that we need to compare against the index contents might be
2078 * ordered differently. So we might have to "look back" within the
2079 * tuplesort output, but only within the current page. We handle that
2080 * by keeping a bool array in_index[] showing all the
2081 * already-passed-over tuplesort output TIDs of the current page. We
2082 * clear that array here, when advancing onto a new heap page.
2084 if (scan->rs_cblock != root_blkno)
2086 Page page = BufferGetPage(scan->rs_cbuf);
2088 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2089 heap_get_root_tuples(page, root_offsets);
2090 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2092 memset(in_index, 0, sizeof(in_index));
2094 root_blkno = scan->rs_cblock;
2097 /* Convert actual tuple TID to root TID */
2098 rootTuple = *heapcursor;
2099 root_offnum = ItemPointerGetOffsetNumber(heapcursor);
2101 if (HeapTupleIsHeapOnly(heapTuple))
2103 root_offnum = root_offsets[root_offnum - 1];
2104 Assert(OffsetNumberIsValid(root_offnum));
2105 ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
2109 * "merge" by skipping through the index tuples until we find or pass
2110 * the current root tuple.
2112 while (!tuplesort_empty &&
2113 (!indexcursor ||
2114 ItemPointerCompare(indexcursor, &rootTuple) < 0))
2116 Datum ts_val;
2117 bool ts_isnull;
2119 if (indexcursor)
2122 * Remember index items seen earlier on the current heap page
2124 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
2125 in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
2126 pfree(indexcursor);
2129 tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
2130 &ts_val, &ts_isnull);
2131 Assert(tuplesort_empty || !ts_isnull);
2132 indexcursor = (ItemPointer) DatumGetPointer(ts_val);
2136 * If the tuplesort has overshot *and* we didn't see a match earlier,
2137 * then this tuple is missing from the index, so insert it.
2139 if ((tuplesort_empty ||
2140 ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
2141 !in_index[root_offnum - 1])
2143 MemoryContextReset(econtext->ecxt_per_tuple_memory);
2145 /* Set up for predicate or expression evaluation */
2146 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2149 * In a partial index, discard tuples that don't satisfy the
2150 * predicate.
2152 if (predicate != NIL)
2154 if (!ExecQual(predicate, econtext, false))
2155 continue;
2159 * For the current heap tuple, extract all the attributes we use
2160 * in this index, and note which are null. This also performs
2161 * evaluation of any expressions needed.
2163 FormIndexDatum(indexInfo,
2164 slot,
2165 estate,
2166 values,
2167 isnull);
2170 * You'd think we should go ahead and build the index tuple here,
2171 * but some index AMs want to do further processing on the data
2172 * first. So pass the values[] and isnull[] arrays, instead.
2176 * If the tuple is already committed dead, you might think we
2177 * could suppress uniqueness checking, but this is no longer true
2178 * in the presence of HOT, because the insert is actually a proxy
2179 * for a uniqueness check on the whole HOT-chain. That is, the
2180 * tuple we have here could be dead because it was already
2181 * HOT-updated, and if so the updating transaction will not have
2182 * thought it should insert index entries. The index AM will
2183 * check the whole HOT-chain and correctly detect a conflict if
2184 * there is one.
2187 index_insert(indexRelation,
2188 values,
2189 isnull,
2190 &rootTuple,
2191 heapRelation,
2192 indexInfo->ii_Unique);
2194 state->tups_inserted += 1;
2198 heap_endscan(scan);
2200 ExecDropSingleTupleTableSlot(slot);
2202 FreeExecutorState(estate);
2204 /* These may have been pointing to the now-gone estate */
2205 indexInfo->ii_ExpressionsState = NIL;
2206 indexInfo->ii_PredicateState = NIL;
2211 * IndexGetRelation: given an index's relation OID, get the OID of the
2212 * relation it is an index on. Uses the system cache.
2214 static Oid
2215 IndexGetRelation(Oid indexId)
2217 HeapTuple tuple;
2218 Form_pg_index index;
2219 Oid result;
2221 tuple = SearchSysCache(INDEXRELID,
2222 ObjectIdGetDatum(indexId),
2223 0, 0, 0);
2224 if (!HeapTupleIsValid(tuple))
2225 elog(ERROR, "cache lookup failed for index %u", indexId);
2226 index = (Form_pg_index) GETSTRUCT(tuple);
2227 Assert(index->indexrelid == indexId);
2229 result = index->indrelid;
2230 ReleaseSysCache(tuple);
2231 return result;
2235 * reindex_index - This routine is used to recreate a single index
2237 void
2238 reindex_index(Oid indexId)
2240 Relation iRel,
2241 heapRelation,
2242 pg_index;
2243 Oid heapId;
2244 bool inplace;
2245 IndexInfo *indexInfo;
2246 HeapTuple indexTuple;
2247 Form_pg_index indexForm;
2250 * Open and lock the parent heap relation. ShareLock is sufficient since
2251 * we only need to be sure no schema or data changes are going on.
2253 heapId = IndexGetRelation(indexId);
2254 heapRelation = heap_open(heapId, ShareLock);
2257 * Open the target index relation and get an exclusive lock on it, to
2258 * ensure that no one else is touching this particular index.
2260 iRel = index_open(indexId, AccessExclusiveLock);
2263 * Don't allow reindex on temp tables of other backends ... their local
2264 * buffer manager is not going to cope.
2266 if (RELATION_IS_OTHER_TEMP(iRel))
2267 ereport(ERROR,
2268 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2269 errmsg("cannot reindex temporary tables of other sessions")));
2272 * Also check for active uses of the index in the current transaction;
2273 * we don't want to reindex underneath an open indexscan.
2275 CheckTableNotInUse(iRel, "REINDEX INDEX");
2278 * If it's a shared index, we must do inplace processing (because we have
2279 * no way to update relfilenode in other databases). Otherwise we can do
2280 * it the normal transaction-safe way.
2282 * Since inplace processing isn't crash-safe, we only allow it in a
2283 * standalone backend. (In the REINDEX TABLE and REINDEX DATABASE cases,
2284 * the caller should have detected this.)
2286 inplace = iRel->rd_rel->relisshared;
2288 if (inplace && IsUnderPostmaster)
2289 ereport(ERROR,
2290 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2291 errmsg("shared index \"%s\" can only be reindexed in stand-alone mode",
2292 RelationGetRelationName(iRel))));
2294 PG_TRY();
2296 /* Suppress use of the target index while rebuilding it */
2297 SetReindexProcessing(heapId, indexId);
2299 /* Fetch info needed for index_build */
2300 indexInfo = BuildIndexInfo(iRel);
2302 if (inplace)
2305 * Truncate the actual file (and discard buffers).
2307 RelationTruncate(iRel, 0);
2309 else
2312 * We'll build a new physical relation for the index.
2314 setNewRelfilenode(iRel, InvalidTransactionId);
2317 /* Initialize the index and rebuild */
2318 /* Note: we do not need to re-establish pkey setting */
2319 index_build(heapRelation, iRel, indexInfo, false);
2321 PG_CATCH();
2323 /* Make sure flag gets cleared on error exit */
2324 ResetReindexProcessing();
2325 PG_RE_THROW();
2327 PG_END_TRY();
2328 ResetReindexProcessing();
2331 * If the index is marked invalid or not ready (ie, it's from a failed
2332 * CREATE INDEX CONCURRENTLY), we can now mark it valid. This allows
2333 * REINDEX to be used to clean up in such cases.
2335 * We can also reset indcheckxmin, because we have now done a
2336 * non-concurrent index build, *except* in the case where index_build
2337 * found some still-broken HOT chains.
2339 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
2341 indexTuple = SearchSysCacheCopy(INDEXRELID,
2342 ObjectIdGetDatum(indexId),
2343 0, 0, 0);
2344 if (!HeapTupleIsValid(indexTuple))
2345 elog(ERROR, "cache lookup failed for index %u", indexId);
2346 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2348 if (!indexForm->indisvalid || !indexForm->indisready ||
2349 (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
2351 indexForm->indisvalid = true;
2352 indexForm->indisready = true;
2353 if (!indexInfo->ii_BrokenHotChain)
2354 indexForm->indcheckxmin = false;
2355 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
2356 CatalogUpdateIndexes(pg_index, indexTuple);
2358 heap_close(pg_index, RowExclusiveLock);
2360 /* Close rels, but keep locks */
2361 index_close(iRel, NoLock);
2362 heap_close(heapRelation, NoLock);
2366 * reindex_relation - This routine is used to recreate all indexes
2367 * of a relation (and optionally its toast relation too, if any).
2369 * Returns true if any indexes were rebuilt. Note that a
2370 * CommandCounterIncrement will occur after each index rebuild.
2372 bool
2373 reindex_relation(Oid relid, bool toast_too)
2375 Relation rel;
2376 Oid toast_relid;
2377 bool is_pg_class;
2378 bool result;
2379 List *indexIds,
2380 *doneIndexes;
2381 ListCell *indexId;
2384 * Open and lock the relation. ShareLock is sufficient since we only need
2385 * to prevent schema and data changes in it.
2387 rel = heap_open(relid, ShareLock);
2389 toast_relid = rel->rd_rel->reltoastrelid;
2392 * Get the list of index OIDs for this relation. (We trust to the
2393 * relcache to get this with a sequential scan if ignoring system
2394 * indexes.)
2396 indexIds = RelationGetIndexList(rel);
2399 * reindex_index will attempt to update the pg_class rows for the relation
2400 * and index. If we are processing pg_class itself, we want to make sure
2401 * that the updates do not try to insert index entries into indexes we
2402 * have not processed yet. (When we are trying to recover from corrupted
2403 * indexes, that could easily cause a crash.) We can accomplish this
2404 * because CatalogUpdateIndexes will use the relcache's index list to know
2405 * which indexes to update. We just force the index list to be only the
2406 * stuff we've processed.
2408 * It is okay to not insert entries into the indexes we have not processed
2409 * yet because all of this is transaction-safe. If we fail partway
2410 * through, the updated rows are dead and it doesn't matter whether they
2411 * have index entries. Also, a new pg_class index will be created with an
2412 * entry for its own pg_class row because we do setNewRelfilenode() before
2413 * we do index_build().
2415 * Note that we also clear pg_class's rd_oidindex until the loop is done,
2416 * so that that index can't be accessed either. This means we cannot
2417 * safely generate new relation OIDs while in the loop; shouldn't be a
2418 * problem.
2420 is_pg_class = (RelationGetRelid(rel) == RelationRelationId);
2422 /* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */
2423 if (is_pg_class)
2424 (void) RelationGetIndexAttrBitmap(rel);
2426 /* Reindex all the indexes. */
2427 doneIndexes = NIL;
2428 foreach(indexId, indexIds)
2430 Oid indexOid = lfirst_oid(indexId);
2432 if (is_pg_class)
2433 RelationSetIndexList(rel, doneIndexes, InvalidOid);
2435 reindex_index(indexOid);
2437 CommandCounterIncrement();
2439 if (is_pg_class)
2440 doneIndexes = lappend_oid(doneIndexes, indexOid);
2443 if (is_pg_class)
2444 RelationSetIndexList(rel, indexIds, ClassOidIndexId);
2447 * Close rel, but continue to hold the lock.
2449 heap_close(rel, NoLock);
2451 result = (indexIds != NIL);
2454 * If the relation has a secondary toast rel, reindex that too while we
2455 * still hold the lock on the master table.
2457 if (toast_too && OidIsValid(toast_relid))
2458 result |= reindex_relation(toast_relid, false);
2460 return result;