Fix oversight in previous error-reporting patch; mustn't pfree path string
[PostgreSQL.git] / src / backend / catalog / index.c
blobe244f92f0b1a0ef64db1510920b4c31e71081d0f
1 /*-------------------------------------------------------------------------
3 * index.c
4 * code to create and destroy POSTGRES index relations
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * $PostgreSQL$
14 * INTERFACE ROUTINES
15 * index_create() - Create a cataloged index relation
16 * index_drop() - Removes index relation from catalogs
17 * BuildIndexInfo() - Prepare to insert index tuples
18 * FormIndexDatum() - Construct datum vector for one index tuple
20 *-------------------------------------------------------------------------
22 #include "postgres.h"
24 #include <unistd.h>
26 #include "access/genam.h"
27 #include "access/heapam.h"
28 #include "access/relscan.h"
29 #include "access/sysattr.h"
30 #include "access/transam.h"
31 #include "access/xact.h"
32 #include "bootstrap/bootstrap.h"
33 #include "catalog/catalog.h"
34 #include "catalog/dependency.h"
35 #include "catalog/heap.h"
36 #include "catalog/index.h"
37 #include "catalog/indexing.h"
38 #include "catalog/namespace.h"
39 #include "catalog/pg_constraint.h"
40 #include "catalog/pg_operator.h"
41 #include "catalog/pg_opclass.h"
42 #include "catalog/pg_tablespace.h"
43 #include "catalog/pg_type.h"
44 #include "commands/tablecmds.h"
45 #include "executor/executor.h"
46 #include "miscadmin.h"
47 #include "nodes/nodeFuncs.h"
48 #include "optimizer/clauses.h"
49 #include "optimizer/var.h"
50 #include "storage/bufmgr.h"
51 #include "storage/lmgr.h"
52 #include "storage/procarray.h"
53 #include "storage/smgr.h"
54 #include "utils/builtins.h"
55 #include "utils/fmgroids.h"
56 #include "utils/inval.h"
57 #include "utils/lsyscache.h"
58 #include "utils/memutils.h"
59 #include "utils/relcache.h"
60 #include "utils/syscache.h"
61 #include "utils/tuplesort.h"
62 #include "utils/snapmgr.h"
63 #include "utils/tqual.h"
66 /* state info for validate_index bulkdelete callback */
67 typedef struct
69 Tuplesortstate *tuplesort; /* for sorting the index TIDs */
70 /* statistics (for debug purposes only): */
71 double htups,
72 itups,
73 tups_inserted;
74 } v_i_state;
76 /* non-export function prototypes */
77 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
78 IndexInfo *indexInfo,
79 Oid accessMethodObjectId,
80 Oid *classObjectId);
81 static void InitializeAttributeOids(Relation indexRelation,
82 int numatts, Oid indexoid);
83 static void AppendAttributeTuples(Relation indexRelation, int numatts);
84 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
85 IndexInfo *indexInfo,
86 Oid *classOids,
87 int16 *coloptions,
88 bool primary,
89 bool isvalid);
90 static void index_update_stats(Relation rel, bool hasindex, bool isprimary,
91 Oid reltoastidxid, double reltuples);
92 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
93 static void validate_index_heapscan(Relation heapRelation,
94 Relation indexRelation,
95 IndexInfo *indexInfo,
96 Snapshot snapshot,
97 v_i_state *state);
98 static Oid IndexGetRelation(Oid indexId);
102 * ConstructTupleDescriptor
104 * Build an index tuple descriptor for a new index
106 static TupleDesc
107 ConstructTupleDescriptor(Relation heapRelation,
108 IndexInfo *indexInfo,
109 Oid accessMethodObjectId,
110 Oid *classObjectId)
112 int numatts = indexInfo->ii_NumIndexAttrs;
113 ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
114 HeapTuple amtuple;
115 Form_pg_am amform;
116 TupleDesc heapTupDesc;
117 TupleDesc indexTupDesc;
118 int natts; /* #atts in heap rel --- for error checks */
119 int i;
121 /* We need access to the index AM's pg_am tuple */
122 amtuple = SearchSysCache(AMOID,
123 ObjectIdGetDatum(accessMethodObjectId),
124 0, 0, 0);
125 if (!HeapTupleIsValid(amtuple))
126 elog(ERROR, "cache lookup failed for access method %u",
127 accessMethodObjectId);
128 amform = (Form_pg_am) GETSTRUCT(amtuple);
130 /* ... and to the table's tuple descriptor */
131 heapTupDesc = RelationGetDescr(heapRelation);
132 natts = RelationGetForm(heapRelation)->relnatts;
135 * allocate the new tuple descriptor
137 indexTupDesc = CreateTemplateTupleDesc(numatts, false);
140 * For simple index columns, we copy the pg_attribute row from the parent
141 * relation and modify it as necessary. For expressions we have to cons
142 * up a pg_attribute row the hard way.
144 for (i = 0; i < numatts; i++)
146 AttrNumber atnum = indexInfo->ii_KeyAttrNumbers[i];
147 Form_pg_attribute to = indexTupDesc->attrs[i];
148 HeapTuple tuple;
149 Form_pg_type typeTup;
150 Form_pg_opclass opclassTup;
151 Oid keyType;
153 if (atnum != 0)
155 /* Simple index column */
156 Form_pg_attribute from;
158 if (atnum < 0)
161 * here we are indexing on a system attribute (-1...-n)
163 from = SystemAttributeDefinition(atnum,
164 heapRelation->rd_rel->relhasoids);
166 else
169 * here we are indexing on a normal attribute (1...n)
171 if (atnum > natts) /* safety check */
172 elog(ERROR, "invalid column number %d", atnum);
173 from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
177 * now that we've determined the "from", let's copy the tuple desc
178 * data...
180 memcpy(to, from, ATTRIBUTE_TUPLE_SIZE);
183 * Fix the stuff that should not be the same as the underlying
184 * attr
186 to->attnum = i + 1;
188 to->attstattarget = -1;
189 to->attcacheoff = -1;
190 to->attnotnull = false;
191 to->atthasdef = false;
192 to->attislocal = true;
193 to->attinhcount = 0;
195 else
197 /* Expressional index */
198 Node *indexkey;
200 MemSet(to, 0, ATTRIBUTE_TUPLE_SIZE);
202 if (indexpr_item == NULL) /* shouldn't happen */
203 elog(ERROR, "too few entries in indexprs list");
204 indexkey = (Node *) lfirst(indexpr_item);
205 indexpr_item = lnext(indexpr_item);
208 * Make the attribute's name "pg_expresssion_nnn" (maybe think of
209 * something better later)
211 sprintf(NameStr(to->attname), "pg_expression_%d", i + 1);
214 * Lookup the expression type in pg_type for the type length etc.
216 keyType = exprType(indexkey);
217 tuple = SearchSysCache(TYPEOID,
218 ObjectIdGetDatum(keyType),
219 0, 0, 0);
220 if (!HeapTupleIsValid(tuple))
221 elog(ERROR, "cache lookup failed for type %u", keyType);
222 typeTup = (Form_pg_type) GETSTRUCT(tuple);
225 * Assign some of the attributes values. Leave the rest as 0.
227 to->attnum = i + 1;
228 to->atttypid = keyType;
229 to->attlen = typeTup->typlen;
230 to->attbyval = typeTup->typbyval;
231 to->attstorage = typeTup->typstorage;
232 to->attalign = typeTup->typalign;
233 to->attstattarget = -1;
234 to->attcacheoff = -1;
235 to->atttypmod = -1;
236 to->attislocal = true;
238 ReleaseSysCache(tuple);
241 * Make sure the expression yields a type that's safe to store in
242 * an index. We need this defense because we have index opclasses
243 * for pseudo-types such as "record", and the actually stored type
244 * had better be safe; eg, a named composite type is okay, an
245 * anonymous record type is not. The test is the same as for
246 * whether a table column is of a safe type (which is why we
247 * needn't check for the non-expression case).
249 CheckAttributeType(NameStr(to->attname), to->atttypid);
253 * We do not yet have the correct relation OID for the index, so just
254 * set it invalid for now. InitializeAttributeOids() will fix it
255 * later.
257 to->attrelid = InvalidOid;
260 * Check the opclass and index AM to see if either provides a keytype
261 * (overriding the attribute type). Opclass takes precedence.
263 tuple = SearchSysCache(CLAOID,
264 ObjectIdGetDatum(classObjectId[i]),
265 0, 0, 0);
266 if (!HeapTupleIsValid(tuple))
267 elog(ERROR, "cache lookup failed for opclass %u",
268 classObjectId[i]);
269 opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
270 if (OidIsValid(opclassTup->opckeytype))
271 keyType = opclassTup->opckeytype;
272 else
273 keyType = amform->amkeytype;
274 ReleaseSysCache(tuple);
276 if (OidIsValid(keyType) && keyType != to->atttypid)
278 /* index value and heap value have different types */
279 tuple = SearchSysCache(TYPEOID,
280 ObjectIdGetDatum(keyType),
281 0, 0, 0);
282 if (!HeapTupleIsValid(tuple))
283 elog(ERROR, "cache lookup failed for type %u", keyType);
284 typeTup = (Form_pg_type) GETSTRUCT(tuple);
286 to->atttypid = keyType;
287 to->atttypmod = -1;
288 to->attlen = typeTup->typlen;
289 to->attbyval = typeTup->typbyval;
290 to->attalign = typeTup->typalign;
291 to->attstorage = typeTup->typstorage;
293 ReleaseSysCache(tuple);
297 ReleaseSysCache(amtuple);
299 return indexTupDesc;
302 /* ----------------------------------------------------------------
303 * InitializeAttributeOids
304 * ----------------------------------------------------------------
306 static void
307 InitializeAttributeOids(Relation indexRelation,
308 int numatts,
309 Oid indexoid)
311 TupleDesc tupleDescriptor;
312 int i;
314 tupleDescriptor = RelationGetDescr(indexRelation);
316 for (i = 0; i < numatts; i += 1)
317 tupleDescriptor->attrs[i]->attrelid = indexoid;
320 /* ----------------------------------------------------------------
321 * AppendAttributeTuples
322 * ----------------------------------------------------------------
324 static void
325 AppendAttributeTuples(Relation indexRelation, int numatts)
327 Relation pg_attribute;
328 CatalogIndexState indstate;
329 TupleDesc indexTupDesc;
330 int i;
333 * open the attribute relation and its indexes
335 pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
337 indstate = CatalogOpenIndexes(pg_attribute);
340 * insert data from new index's tupdesc into pg_attribute
342 indexTupDesc = RelationGetDescr(indexRelation);
344 for (i = 0; i < numatts; i++)
347 * There used to be very grotty code here to set these fields, but I
348 * think it's unnecessary. They should be set already.
350 Assert(indexTupDesc->attrs[i]->attnum == i + 1);
351 Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
353 InsertPgAttributeTuple(pg_attribute, indexTupDesc->attrs[i], indstate);
356 CatalogCloseIndexes(indstate);
358 heap_close(pg_attribute, RowExclusiveLock);
361 /* ----------------------------------------------------------------
362 * UpdateIndexRelation
364 * Construct and insert a new entry in the pg_index catalog
365 * ----------------------------------------------------------------
367 static void
368 UpdateIndexRelation(Oid indexoid,
369 Oid heapoid,
370 IndexInfo *indexInfo,
371 Oid *classOids,
372 int16 *coloptions,
373 bool primary,
374 bool isvalid)
376 int2vector *indkey;
377 oidvector *indclass;
378 int2vector *indoption;
379 Datum exprsDatum;
380 Datum predDatum;
381 Datum values[Natts_pg_index];
382 bool nulls[Natts_pg_index];
383 Relation pg_index;
384 HeapTuple tuple;
385 int i;
388 * Copy the index key, opclass, and indoption info into arrays (should we
389 * make the caller pass them like this to start with?)
391 indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
392 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
393 indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
394 indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
395 indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs);
398 * Convert the index expressions (if any) to a text datum
400 if (indexInfo->ii_Expressions != NIL)
402 char *exprsString;
404 exprsString = nodeToString(indexInfo->ii_Expressions);
405 exprsDatum = CStringGetTextDatum(exprsString);
406 pfree(exprsString);
408 else
409 exprsDatum = (Datum) 0;
412 * Convert the index predicate (if any) to a text datum. Note we convert
413 * implicit-AND format to normal explicit-AND for storage.
415 if (indexInfo->ii_Predicate != NIL)
417 char *predString;
419 predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
420 predDatum = CStringGetTextDatum(predString);
421 pfree(predString);
423 else
424 predDatum = (Datum) 0;
427 * open the system catalog index relation
429 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
432 * Build a pg_index tuple
434 MemSet(nulls, false, sizeof(nulls));
436 values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
437 values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
438 values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
439 values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
440 values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
441 values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
442 values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
443 values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
444 /* we set isvalid and isready the same way */
445 values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid);
446 values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
447 values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
448 values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
449 values[Anum_pg_index_indexprs - 1] = exprsDatum;
450 if (exprsDatum == (Datum) 0)
451 nulls[Anum_pg_index_indexprs - 1] = true;
452 values[Anum_pg_index_indpred - 1] = predDatum;
453 if (predDatum == (Datum) 0)
454 nulls[Anum_pg_index_indpred - 1] = true;
456 tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
459 * insert the tuple into the pg_index catalog
461 simple_heap_insert(pg_index, tuple);
463 /* update the indexes on pg_index */
464 CatalogUpdateIndexes(pg_index, tuple);
467 * close the relation and free the tuple
469 heap_close(pg_index, RowExclusiveLock);
470 heap_freetuple(tuple);
475 * index_create
477 * heapRelationId: OID of table to build index on
478 * indexRelationName: what it say
479 * indexRelationId: normally, pass InvalidOid to let this routine
480 * generate an OID for the index. During bootstrap this may be
481 * nonzero to specify a preselected OID.
482 * indexInfo: same info executor uses to insert into the index
483 * accessMethodObjectId: OID of index AM to use
484 * tableSpaceId: OID of tablespace to use
485 * classObjectId: array of index opclass OIDs, one per index column
486 * coloptions: array of per-index-column indoption settings
487 * reloptions: AM-specific options
488 * isprimary: index is a PRIMARY KEY
489 * isconstraint: index is owned by a PRIMARY KEY or UNIQUE constraint
490 * allow_system_table_mods: allow table to be a system catalog
491 * skip_build: true to skip the index_build() step for the moment; caller
492 * must do it later (typically via reindex_index())
493 * concurrent: if true, do not lock the table against writers. The index
494 * will be marked "invalid" and the caller must take additional steps
495 * to fix it up.
497 * Returns OID of the created index.
500 index_create(Oid heapRelationId,
501 const char *indexRelationName,
502 Oid indexRelationId,
503 IndexInfo *indexInfo,
504 Oid accessMethodObjectId,
505 Oid tableSpaceId,
506 Oid *classObjectId,
507 int16 *coloptions,
508 Datum reloptions,
509 bool isprimary,
510 bool isconstraint,
511 bool allow_system_table_mods,
512 bool skip_build,
513 bool concurrent)
515 Relation pg_class;
516 Relation heapRelation;
517 Relation indexRelation;
518 TupleDesc indexTupDesc;
519 bool shared_relation;
520 Oid namespaceId;
521 int i;
523 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
526 * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
527 * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
528 * (but not VACUUM).
530 heapRelation = heap_open(heapRelationId,
531 (concurrent ? ShareUpdateExclusiveLock : ShareLock));
534 * The index will be in the same namespace as its parent table, and is
535 * shared across databases if and only if the parent is.
537 namespaceId = RelationGetNamespace(heapRelation);
538 shared_relation = heapRelation->rd_rel->relisshared;
541 * check parameters
543 if (indexInfo->ii_NumIndexAttrs < 1)
544 elog(ERROR, "must index at least one column");
546 if (!allow_system_table_mods &&
547 IsSystemRelation(heapRelation) &&
548 IsNormalProcessingMode())
549 ereport(ERROR,
550 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
551 errmsg("user-defined indexes on system catalog tables are not supported")));
554 * concurrent index build on a system catalog is unsafe because we tend to
555 * release locks before committing in catalogs
557 if (concurrent &&
558 IsSystemRelation(heapRelation))
559 ereport(ERROR,
560 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
561 errmsg("concurrent index creation on system catalog tables is not supported")));
564 * We cannot allow indexing a shared relation after initdb (because
565 * there's no way to make the entry in other databases' pg_class).
567 if (shared_relation && !IsBootstrapProcessingMode())
568 ereport(ERROR,
569 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
570 errmsg("shared indexes cannot be created after initdb")));
573 * Validate shared/non-shared tablespace (must check this before doing
574 * GetNewRelFileNode, to prevent Assert therein)
576 if (shared_relation)
578 if (tableSpaceId != GLOBALTABLESPACE_OID)
579 /* elog since this is not a user-facing error */
580 elog(ERROR,
581 "shared relations must be placed in pg_global tablespace");
583 else
585 if (tableSpaceId == GLOBALTABLESPACE_OID)
586 ereport(ERROR,
587 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
588 errmsg("only shared relations can be placed in pg_global tablespace")));
591 if (get_relname_relid(indexRelationName, namespaceId))
592 ereport(ERROR,
593 (errcode(ERRCODE_DUPLICATE_TABLE),
594 errmsg("relation \"%s\" already exists",
595 indexRelationName)));
598 * construct tuple descriptor for index tuples
600 indexTupDesc = ConstructTupleDescriptor(heapRelation,
601 indexInfo,
602 accessMethodObjectId,
603 classObjectId);
606 * Allocate an OID for the index, unless we were told what to use.
608 * The OID will be the relfilenode as well, so make sure it doesn't
609 * collide with either pg_class OIDs or existing physical files.
611 if (!OidIsValid(indexRelationId))
612 indexRelationId = GetNewRelFileNode(tableSpaceId, shared_relation,
613 pg_class);
616 * create the index relation's relcache entry and physical disk file. (If
617 * we fail further down, it's the smgr's responsibility to remove the disk
618 * file again.)
620 indexRelation = heap_create(indexRelationName,
621 namespaceId,
622 tableSpaceId,
623 indexRelationId,
624 indexTupDesc,
625 RELKIND_INDEX,
626 shared_relation,
627 allow_system_table_mods);
629 Assert(indexRelationId == RelationGetRelid(indexRelation));
632 * Obtain exclusive lock on it. Although no other backends can see it
633 * until we commit, this prevents deadlock-risk complaints from lock
634 * manager in cases such as CLUSTER.
636 LockRelation(indexRelation, AccessExclusiveLock);
639 * Fill in fields of the index's pg_class entry that are not set correctly
640 * by heap_create.
642 * XXX should have a cleaner way to create cataloged indexes
644 indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
645 indexRelation->rd_rel->relam = accessMethodObjectId;
646 indexRelation->rd_rel->relkind = RELKIND_INDEX;
647 indexRelation->rd_rel->relhasoids = false;
650 * store index's pg_class entry
652 InsertPgClassTuple(pg_class, indexRelation,
653 RelationGetRelid(indexRelation),
654 reloptions);
656 /* done with pg_class */
657 heap_close(pg_class, RowExclusiveLock);
660 * now update the object id's of all the attribute tuple forms in the
661 * index relation's tuple descriptor
663 InitializeAttributeOids(indexRelation,
664 indexInfo->ii_NumIndexAttrs,
665 indexRelationId);
668 * append ATTRIBUTE tuples for the index
670 AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
672 /* ----------------
673 * update pg_index
674 * (append INDEX tuple)
676 * Note that this stows away a representation of "predicate".
677 * (Or, could define a rule to maintain the predicate) --Nels, Feb '92
678 * ----------------
680 UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
681 classObjectId, coloptions, isprimary, !concurrent);
684 * Register constraint and dependencies for the index.
686 * If the index is from a CONSTRAINT clause, construct a pg_constraint
687 * entry. The index is then linked to the constraint, which in turn is
688 * linked to the table. If it's not a CONSTRAINT, make the dependency
689 * directly on the table.
691 * We don't need a dependency on the namespace, because there'll be an
692 * indirect dependency via our parent table.
694 * During bootstrap we can't register any dependencies, and we don't try
695 * to make a constraint either.
697 if (!IsBootstrapProcessingMode())
699 ObjectAddress myself,
700 referenced;
702 myself.classId = RelationRelationId;
703 myself.objectId = indexRelationId;
704 myself.objectSubId = 0;
706 if (isconstraint)
708 char constraintType;
709 Oid conOid;
711 if (isprimary)
712 constraintType = CONSTRAINT_PRIMARY;
713 else if (indexInfo->ii_Unique)
714 constraintType = CONSTRAINT_UNIQUE;
715 else
717 elog(ERROR, "constraint must be PRIMARY or UNIQUE");
718 constraintType = 0; /* keep compiler quiet */
721 /* Shouldn't have any expressions */
722 if (indexInfo->ii_Expressions)
723 elog(ERROR, "constraints cannot have index expressions");
725 conOid = CreateConstraintEntry(indexRelationName,
726 namespaceId,
727 constraintType,
728 false, /* isDeferrable */
729 false, /* isDeferred */
730 heapRelationId,
731 indexInfo->ii_KeyAttrNumbers,
732 indexInfo->ii_NumIndexAttrs,
733 InvalidOid, /* no domain */
734 InvalidOid, /* no foreign key */
735 NULL,
736 NULL,
737 NULL,
738 NULL,
740 ' ',
741 ' ',
742 ' ',
743 InvalidOid, /* no associated index */
744 NULL, /* no check constraint */
745 NULL,
746 NULL,
747 true, /* islocal */
748 0); /* inhcount */
750 referenced.classId = ConstraintRelationId;
751 referenced.objectId = conOid;
752 referenced.objectSubId = 0;
754 recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
756 else
758 bool have_simple_col = false;
760 /* Create auto dependencies on simply-referenced columns */
761 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
763 if (indexInfo->ii_KeyAttrNumbers[i] != 0)
765 referenced.classId = RelationRelationId;
766 referenced.objectId = heapRelationId;
767 referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
769 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
771 have_simple_col = true;
776 * It's possible for an index to not depend on any columns of the
777 * table at all, in which case we need to give it a dependency on
778 * the table as a whole; else it won't get dropped when the table
779 * is dropped. This edge case is not totally useless; for
780 * example, a unique index on a constant expression can serve to
781 * prevent a table from containing more than one row.
783 if (!have_simple_col &&
784 !contain_vars_of_level((Node *) indexInfo->ii_Expressions, 0) &&
785 !contain_vars_of_level((Node *) indexInfo->ii_Predicate, 0))
787 referenced.classId = RelationRelationId;
788 referenced.objectId = heapRelationId;
789 referenced.objectSubId = 0;
791 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
795 /* Store dependency on operator classes */
796 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
798 referenced.classId = OperatorClassRelationId;
799 referenced.objectId = classObjectId[i];
800 referenced.objectSubId = 0;
802 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
805 /* Store dependencies on anything mentioned in index expressions */
806 if (indexInfo->ii_Expressions)
808 recordDependencyOnSingleRelExpr(&myself,
809 (Node *) indexInfo->ii_Expressions,
810 heapRelationId,
811 DEPENDENCY_NORMAL,
812 DEPENDENCY_AUTO);
815 /* Store dependencies on anything mentioned in predicate */
816 if (indexInfo->ii_Predicate)
818 recordDependencyOnSingleRelExpr(&myself,
819 (Node *) indexInfo->ii_Predicate,
820 heapRelationId,
821 DEPENDENCY_NORMAL,
822 DEPENDENCY_AUTO);
827 * Advance the command counter so that we can see the newly-entered
828 * catalog tuples for the index.
830 CommandCounterIncrement();
833 * In bootstrap mode, we have to fill in the index strategy structure with
834 * information from the catalogs. If we aren't bootstrapping, then the
835 * relcache entry has already been rebuilt thanks to sinval update during
836 * CommandCounterIncrement.
838 if (IsBootstrapProcessingMode())
839 RelationInitIndexAccessInfo(indexRelation);
840 else
841 Assert(indexRelation->rd_indexcxt != NULL);
844 * If this is bootstrap (initdb) time, then we don't actually fill in the
845 * index yet. We'll be creating more indexes and classes later, so we
846 * delay filling them in until just before we're done with bootstrapping.
847 * Similarly, if the caller specified skip_build then filling the index is
848 * delayed till later (ALTER TABLE can save work in some cases with this).
849 * Otherwise, we call the AM routine that constructs the index.
851 if (IsBootstrapProcessingMode())
853 index_register(heapRelationId, indexRelationId, indexInfo);
855 else if (skip_build)
858 * Caller is responsible for filling the index later on. However,
859 * we'd better make sure that the heap relation is correctly marked as
860 * having an index.
862 index_update_stats(heapRelation,
863 true,
864 isprimary,
865 InvalidOid,
866 heapRelation->rd_rel->reltuples);
867 /* Make the above update visible */
868 CommandCounterIncrement();
870 else
872 index_build(heapRelation, indexRelation, indexInfo, isprimary);
876 * Close the heap and index; but we keep the locks that we acquired above
877 * until end of transaction.
879 index_close(indexRelation, NoLock);
880 heap_close(heapRelation, NoLock);
882 return indexRelationId;
886 * index_drop
888 * NOTE: this routine should now only be called through performDeletion(),
889 * else associated dependencies won't be cleaned up.
891 void
892 index_drop(Oid indexId)
894 Oid heapId;
895 Relation userHeapRelation;
896 Relation userIndexRelation;
897 Relation indexRelation;
898 HeapTuple tuple;
899 bool hasexprs;
900 ForkNumber forknum;
903 * To drop an index safely, we must grab exclusive lock on its parent
904 * table; otherwise there could be other backends using the index!
905 * Exclusive lock on the index alone is insufficient because another
906 * backend might be in the midst of devising a query plan that will use
907 * the index. The parser and planner take care to hold an appropriate
908 * lock on the parent table while working, but having them hold locks on
909 * all the indexes too seems overly expensive. We do grab exclusive lock
910 * on the index too, just to be safe. Both locks must be held till end of
911 * transaction, else other backends will still see this index in pg_index.
913 heapId = IndexGetRelation(indexId);
914 userHeapRelation = heap_open(heapId, AccessExclusiveLock);
916 userIndexRelation = index_open(indexId, AccessExclusiveLock);
919 * Schedule physical removal of the files
921 RelationOpenSmgr(userIndexRelation);
922 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
923 if (smgrexists(userIndexRelation->rd_smgr, forknum))
924 smgrscheduleunlink(userIndexRelation->rd_smgr, forknum,
925 userIndexRelation->rd_istemp);
926 RelationCloseSmgr(userIndexRelation);
929 * Close and flush the index's relcache entry, to ensure relcache doesn't
930 * try to rebuild it while we're deleting catalog entries. We keep the
931 * lock though.
933 index_close(userIndexRelation, NoLock);
935 RelationForgetRelation(indexId);
938 * fix INDEX relation, and check for expressional index
940 indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
942 tuple = SearchSysCache(INDEXRELID,
943 ObjectIdGetDatum(indexId),
944 0, 0, 0);
945 if (!HeapTupleIsValid(tuple))
946 elog(ERROR, "cache lookup failed for index %u", indexId);
948 hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
950 simple_heap_delete(indexRelation, &tuple->t_self);
952 ReleaseSysCache(tuple);
953 heap_close(indexRelation, RowExclusiveLock);
956 * if it has any expression columns, we might have stored statistics about
957 * them.
959 if (hasexprs)
960 RemoveStatistics(indexId, 0);
963 * fix ATTRIBUTE relation
965 DeleteAttributeTuples(indexId);
968 * fix RELATION relation
970 DeleteRelationTuple(indexId);
973 * We are presently too lazy to attempt to compute the new correct value
974 * of relhasindex (the next VACUUM will fix it if necessary). So there is
975 * no need to update the pg_class tuple for the owning relation. But we
976 * must send out a shared-cache-inval notice on the owning relation to
977 * ensure other backends update their relcache lists of indexes.
979 CacheInvalidateRelcache(userHeapRelation);
982 * Close owning rel, but keep lock
984 heap_close(userHeapRelation, NoLock);
987 /* ----------------------------------------------------------------
988 * index_build support
989 * ----------------------------------------------------------------
992 /* ----------------
993 * BuildIndexInfo
994 * Construct an IndexInfo record for an open index
996 * IndexInfo stores the information about the index that's needed by
997 * FormIndexDatum, which is used for both index_build() and later insertion
998 * of individual index tuples. Normally we build an IndexInfo for an index
999 * just once per command, and then use it for (potentially) many tuples.
1000 * ----------------
1002 IndexInfo *
1003 BuildIndexInfo(Relation index)
1005 IndexInfo *ii = makeNode(IndexInfo);
1006 Form_pg_index indexStruct = index->rd_index;
1007 int i;
1008 int numKeys;
1010 /* check the number of keys, and copy attr numbers into the IndexInfo */
1011 numKeys = indexStruct->indnatts;
1012 if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1013 elog(ERROR, "invalid indnatts %d for index %u",
1014 numKeys, RelationGetRelid(index));
1015 ii->ii_NumIndexAttrs = numKeys;
1016 for (i = 0; i < numKeys; i++)
1017 ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1019 /* fetch any expressions needed for expressional indexes */
1020 ii->ii_Expressions = RelationGetIndexExpressions(index);
1021 ii->ii_ExpressionsState = NIL;
1023 /* fetch index predicate if any */
1024 ii->ii_Predicate = RelationGetIndexPredicate(index);
1025 ii->ii_PredicateState = NIL;
1027 /* other info */
1028 ii->ii_Unique = indexStruct->indisunique;
1029 ii->ii_ReadyForInserts = indexStruct->indisready;
1031 /* initialize index-build state to default */
1032 ii->ii_Concurrent = false;
1033 ii->ii_BrokenHotChain = false;
1035 return ii;
1038 /* ----------------
1039 * FormIndexDatum
1040 * Construct values[] and isnull[] arrays for a new index tuple.
1042 * indexInfo Info about the index
1043 * slot Heap tuple for which we must prepare an index entry
1044 * estate executor state for evaluating any index expressions
1045 * values Array of index Datums (output area)
1046 * isnull Array of is-null indicators (output area)
1048 * When there are no index expressions, estate may be NULL. Otherwise it
1049 * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
1050 * context must point to the heap tuple passed in.
1052 * Notice we don't actually call index_form_tuple() here; we just prepare
1053 * its input arrays values[] and isnull[]. This is because the index AM
1054 * may wish to alter the data before storage.
1055 * ----------------
1057 void
1058 FormIndexDatum(IndexInfo *indexInfo,
1059 TupleTableSlot *slot,
1060 EState *estate,
1061 Datum *values,
1062 bool *isnull)
1064 ListCell *indexpr_item;
1065 int i;
1067 if (indexInfo->ii_Expressions != NIL &&
1068 indexInfo->ii_ExpressionsState == NIL)
1070 /* First time through, set up expression evaluation state */
1071 indexInfo->ii_ExpressionsState = (List *)
1072 ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
1073 estate);
1074 /* Check caller has set up context correctly */
1075 Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1077 indexpr_item = list_head(indexInfo->ii_ExpressionsState);
1079 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1081 int keycol = indexInfo->ii_KeyAttrNumbers[i];
1082 Datum iDatum;
1083 bool isNull;
1085 if (keycol != 0)
1088 * Plain index column; get the value we need directly from the
1089 * heap tuple.
1091 iDatum = slot_getattr(slot, keycol, &isNull);
1093 else
1096 * Index expression --- need to evaluate it.
1098 if (indexpr_item == NULL)
1099 elog(ERROR, "wrong number of index expressions");
1100 iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
1101 GetPerTupleExprContext(estate),
1102 &isNull,
1103 NULL);
1104 indexpr_item = lnext(indexpr_item);
1106 values[i] = iDatum;
1107 isnull[i] = isNull;
1110 if (indexpr_item != NULL)
1111 elog(ERROR, "wrong number of index expressions");
1116 * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
1118 * This routine updates the pg_class row of either an index or its parent
1119 * relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed
1120 * to ensure we can do all the necessary work in just one update.
1122 * hasindex: set relhasindex to this value
1123 * isprimary: if true, set relhaspkey true; else no change
1124 * reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
1125 * else no change
1126 * reltuples: set reltuples to this value
1128 * relpages is also updated (using RelationGetNumberOfBlocks()).
1130 * NOTE: an important side-effect of this operation is that an SI invalidation
1131 * message is sent out to all backends --- including me --- causing relcache
1132 * entries to be flushed or updated with the new data. This must happen even
1133 * if we find that no change is needed in the pg_class row. When updating
1134 * a heap entry, this ensures that other backends find out about the new
1135 * index. When updating an index, it's important because some index AMs
1136 * expect a relcache flush to occur after REINDEX.
1138 static void
1139 index_update_stats(Relation rel, bool hasindex, bool isprimary,
1140 Oid reltoastidxid, double reltuples)
1142 BlockNumber relpages = RelationGetNumberOfBlocks(rel);
1143 Oid relid = RelationGetRelid(rel);
1144 Relation pg_class;
1145 HeapTuple tuple;
1146 Form_pg_class rd_rel;
1147 bool dirty;
1150 * We always update the pg_class row using a non-transactional,
1151 * overwrite-in-place update. There are several reasons for this:
1153 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
1155 * 2. We could be reindexing pg_class itself, in which case we can't move
1156 * its pg_class row because CatalogUpdateIndexes might not know about all
1157 * the indexes yet (see reindex_relation).
1159 * 3. Because we execute CREATE INDEX with just share lock on the parent
1160 * rel (to allow concurrent index creations), an ordinary update could
1161 * suffer a tuple-concurrently-updated failure against another CREATE
1162 * INDEX committing at about the same time. We can avoid that by having
1163 * them both do nontransactional updates (we assume they will both be
1164 * trying to change the pg_class row to the same thing, so it doesn't
1165 * matter which goes first).
1167 * 4. Even with just a single CREATE INDEX, there's a risk factor because
1168 * someone else might be trying to open the rel while we commit, and this
1169 * creates a race condition as to whether he will see both or neither of
1170 * the pg_class row versions as valid. Again, a non-transactional update
1171 * avoids the risk. It is indeterminate which state of the row the other
1172 * process will see, but it doesn't matter (if he's only taking
1173 * AccessShareLock, then it's not critical that he see relhasindex true).
1175 * It is safe to use a non-transactional update even though our
1176 * transaction could still fail before committing. Setting relhasindex
1177 * true is safe even if there are no indexes (VACUUM will eventually fix
1178 * it), and of course the relpages and reltuples counts are correct (or at
1179 * least more so than the old values) regardless.
1182 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1185 * Make a copy of the tuple to update. Normally we use the syscache, but
1186 * we can't rely on that during bootstrap or while reindexing pg_class
1187 * itself.
1189 if (IsBootstrapProcessingMode() ||
1190 ReindexIsProcessingHeap(RelationRelationId))
1192 /* don't assume syscache will work */
1193 HeapScanDesc pg_class_scan;
1194 ScanKeyData key[1];
1196 ScanKeyInit(&key[0],
1197 ObjectIdAttributeNumber,
1198 BTEqualStrategyNumber, F_OIDEQ,
1199 ObjectIdGetDatum(relid));
1201 pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key);
1202 tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
1203 tuple = heap_copytuple(tuple);
1204 heap_endscan(pg_class_scan);
1206 else
1208 /* normal case, use syscache */
1209 tuple = SearchSysCacheCopy(RELOID,
1210 ObjectIdGetDatum(relid),
1211 0, 0, 0);
1214 if (!HeapTupleIsValid(tuple))
1215 elog(ERROR, "could not find tuple for relation %u", relid);
1216 rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1218 /* Apply required updates, if any, to copied tuple */
1220 dirty = false;
1221 if (rd_rel->relhasindex != hasindex)
1223 rd_rel->relhasindex = hasindex;
1224 dirty = true;
1226 if (isprimary)
1228 if (!rd_rel->relhaspkey)
1230 rd_rel->relhaspkey = true;
1231 dirty = true;
1234 if (OidIsValid(reltoastidxid))
1236 Assert(rd_rel->relkind == RELKIND_TOASTVALUE);
1237 if (rd_rel->reltoastidxid != reltoastidxid)
1239 rd_rel->reltoastidxid = reltoastidxid;
1240 dirty = true;
1243 if (rd_rel->reltuples != (float4) reltuples)
1245 rd_rel->reltuples = (float4) reltuples;
1246 dirty = true;
1248 if (rd_rel->relpages != (int32) relpages)
1250 rd_rel->relpages = (int32) relpages;
1251 dirty = true;
1255 * If anything changed, write out the tuple
1257 if (dirty)
1259 heap_inplace_update(pg_class, tuple);
1260 /* the above sends a cache inval message */
1262 else
1264 /* no need to change tuple, but force relcache inval anyway */
1265 CacheInvalidateRelcacheByTuple(tuple);
1268 heap_freetuple(tuple);
1270 heap_close(pg_class, RowExclusiveLock);
1274 * setNewRelfilenode - assign a new relfilenode value to the relation
1276 * Caller must already hold exclusive lock on the relation.
1278 * The relation is marked with relfrozenxid=freezeXid (InvalidTransactionId
1279 * must be passed for indexes)
1281 void
1282 setNewRelfilenode(Relation relation, TransactionId freezeXid)
1284 Oid newrelfilenode;
1285 RelFileNode newrnode;
1286 SMgrRelation srel;
1287 Relation pg_class;
1288 HeapTuple tuple;
1289 Form_pg_class rd_rel;
1290 ForkNumber i;
1292 /* Can't change relfilenode for nailed tables (indexes ok though) */
1293 Assert(!relation->rd_isnailed ||
1294 relation->rd_rel->relkind == RELKIND_INDEX);
1295 /* Can't change for shared tables or indexes */
1296 Assert(!relation->rd_rel->relisshared);
1297 /* Indexes must have Invalid frozenxid; other relations must not */
1298 Assert((relation->rd_rel->relkind == RELKIND_INDEX &&
1299 freezeXid == InvalidTransactionId) ||
1300 TransactionIdIsNormal(freezeXid));
1302 /* Allocate a new relfilenode */
1303 newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace,
1304 relation->rd_rel->relisshared,
1305 NULL);
1308 * Find the pg_class tuple for the given relation. This is not used
1309 * during bootstrap, so okay to use heap_update always.
1311 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1313 tuple = SearchSysCacheCopy(RELOID,
1314 ObjectIdGetDatum(RelationGetRelid(relation)),
1315 0, 0, 0);
1316 if (!HeapTupleIsValid(tuple))
1317 elog(ERROR, "could not find tuple for relation %u",
1318 RelationGetRelid(relation));
1319 rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1321 RelationOpenSmgr(relation);
1324 * ... and create storage for corresponding forks in the new relfilenode.
1326 * NOTE: any conflict in relfilenode value will be caught here
1328 newrnode = relation->rd_node;
1329 newrnode.relNode = newrelfilenode;
1330 srel = smgropen(newrnode);
1332 /* Create the main fork, like heap_create() does */
1333 smgrcreate(srel, MAIN_FORKNUM, relation->rd_istemp, false);
1336 * For a heap, create FSM fork as well. Indexams are responsible for
1337 * creating any extra forks themselves.
1339 if (relation->rd_rel->relkind == RELKIND_RELATION ||
1340 relation->rd_rel->relkind == RELKIND_TOASTVALUE)
1341 smgrcreate(srel, FSM_FORKNUM, relation->rd_istemp, false);
1343 /* schedule unlinking old files */
1344 for (i = 0; i <= MAX_FORKNUM; i++)
1346 if (smgrexists(relation->rd_smgr, i))
1347 smgrscheduleunlink(relation->rd_smgr, i, relation->rd_istemp);
1350 smgrclose(srel);
1351 RelationCloseSmgr(relation);
1353 /* update the pg_class row */
1354 rd_rel->relfilenode = newrelfilenode;
1355 rd_rel->relpages = 0; /* it's empty until further notice */
1356 rd_rel->reltuples = 0;
1357 rd_rel->relfrozenxid = freezeXid;
1358 simple_heap_update(pg_class, &tuple->t_self, tuple);
1359 CatalogUpdateIndexes(pg_class, tuple);
1361 heap_freetuple(tuple);
1363 heap_close(pg_class, RowExclusiveLock);
1365 /* Make sure the relfilenode change is visible */
1366 CommandCounterIncrement();
1368 /* Mark the rel as having a new relfilenode in current transaction */
1369 RelationCacheMarkNewRelfilenode(relation);
1374 * index_build - invoke access-method-specific index build procedure
1376 * On entry, the index's catalog entries are valid, and its physical disk
1377 * file has been created but is empty. We call the AM-specific build
1378 * procedure to fill in the index contents. We then update the pg_class
1379 * entries of the index and heap relation as needed, using statistics
1380 * returned by ambuild as well as data passed by the caller.
1382 * Note: when reindexing an existing index, isprimary can be false;
1383 * the index is already properly marked and need not be re-marked.
1385 * Note: before Postgres 8.2, the passed-in heap and index Relations
1386 * were automatically closed by this routine. This is no longer the case.
1387 * The caller opened 'em, and the caller should close 'em.
1389 void
1390 index_build(Relation heapRelation,
1391 Relation indexRelation,
1392 IndexInfo *indexInfo,
1393 bool isprimary)
1395 RegProcedure procedure;
1396 IndexBuildResult *stats;
1397 Oid save_userid;
1398 bool save_secdefcxt;
1401 * sanity checks
1403 Assert(RelationIsValid(indexRelation));
1404 Assert(PointerIsValid(indexRelation->rd_am));
1406 procedure = indexRelation->rd_am->ambuild;
1407 Assert(RegProcedureIsValid(procedure));
1410 * Switch to the table owner's userid, so that any index functions are
1411 * run as that user.
1413 GetUserIdAndContext(&save_userid, &save_secdefcxt);
1414 SetUserIdAndContext(heapRelation->rd_rel->relowner, true);
1417 * Call the access method's build procedure
1419 stats = (IndexBuildResult *)
1420 DatumGetPointer(OidFunctionCall3(procedure,
1421 PointerGetDatum(heapRelation),
1422 PointerGetDatum(indexRelation),
1423 PointerGetDatum(indexInfo)));
1424 Assert(PointerIsValid(stats));
1426 /* Restore userid */
1427 SetUserIdAndContext(save_userid, save_secdefcxt);
1430 * If we found any potentially broken HOT chains, mark the index as not
1431 * being usable until the current transaction is below the event horizon.
1432 * See src/backend/access/heap/README.HOT for discussion.
1434 if (indexInfo->ii_BrokenHotChain)
1436 Oid indexId = RelationGetRelid(indexRelation);
1437 Relation pg_index;
1438 HeapTuple indexTuple;
1439 Form_pg_index indexForm;
1441 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1443 indexTuple = SearchSysCacheCopy(INDEXRELID,
1444 ObjectIdGetDatum(indexId),
1445 0, 0, 0);
1446 if (!HeapTupleIsValid(indexTuple))
1447 elog(ERROR, "cache lookup failed for index %u", indexId);
1448 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1450 indexForm->indcheckxmin = true;
1451 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
1452 CatalogUpdateIndexes(pg_index, indexTuple);
1454 heap_freetuple(indexTuple);
1455 heap_close(pg_index, RowExclusiveLock);
1459 * Update heap and index pg_class rows
1461 index_update_stats(heapRelation,
1462 true,
1463 isprimary,
1464 (heapRelation->rd_rel->relkind == RELKIND_TOASTVALUE) ?
1465 RelationGetRelid(indexRelation) : InvalidOid,
1466 stats->heap_tuples);
1468 index_update_stats(indexRelation,
1469 false,
1470 false,
1471 InvalidOid,
1472 stats->index_tuples);
1474 /* Make the updated versions visible */
1475 CommandCounterIncrement();
1480 * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
1482 * This is called back from an access-method-specific index build procedure
1483 * after the AM has done whatever setup it needs. The parent heap relation
1484 * is scanned to find tuples that should be entered into the index. Each
1485 * such tuple is passed to the AM's callback routine, which does the right
1486 * things to add it to the new index. After we return, the AM's index
1487 * build procedure does whatever cleanup is needed; in particular, it should
1488 * close the heap and index relations.
1490 * The total count of heap tuples is returned. This is for updating pg_class
1491 * statistics. (It's annoying not to be able to do that here, but we can't
1492 * do it until after the relation is closed.) Note that the index AM itself
1493 * must keep track of the number of index tuples; we don't do so here because
1494 * the AM might reject some of the tuples for its own reasons, such as being
1495 * unable to store NULLs.
1497 * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
1498 * any potentially broken HOT chains. Currently, we set this if there are
1499 * any RECENTLY_DEAD entries in a HOT chain, without trying very hard to
1500 * detect whether they're really incompatible with the chain tip.
1502 double
1503 IndexBuildHeapScan(Relation heapRelation,
1504 Relation indexRelation,
1505 IndexInfo *indexInfo,
1506 bool allow_sync,
1507 IndexBuildCallback callback,
1508 void *callback_state)
1510 HeapScanDesc scan;
1511 HeapTuple heapTuple;
1512 Datum values[INDEX_MAX_KEYS];
1513 bool isnull[INDEX_MAX_KEYS];
1514 double reltuples;
1515 List *predicate;
1516 TupleTableSlot *slot;
1517 EState *estate;
1518 ExprContext *econtext;
1519 Snapshot snapshot;
1520 TransactionId OldestXmin;
1521 BlockNumber root_blkno = InvalidBlockNumber;
1522 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1525 * sanity checks
1527 Assert(OidIsValid(indexRelation->rd_rel->relam));
1530 * Need an EState for evaluation of index expressions and partial-index
1531 * predicates. Also a slot to hold the current tuple.
1533 estate = CreateExecutorState();
1534 econtext = GetPerTupleExprContext(estate);
1535 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
1537 /* Arrange for econtext's scan tuple to be the tuple under test */
1538 econtext->ecxt_scantuple = slot;
1540 /* Set up execution state for predicate, if any. */
1541 predicate = (List *)
1542 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
1543 estate);
1546 * Prepare for scan of the base relation. In a normal index build, we use
1547 * SnapshotAny because we must retrieve all tuples and do our own time
1548 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1549 * concurrent build, we take a regular MVCC snapshot and index whatever's
1550 * live according to that. During bootstrap we just use SnapshotNow.
1552 if (IsBootstrapProcessingMode())
1554 snapshot = SnapshotNow;
1555 OldestXmin = InvalidTransactionId; /* not used */
1557 else if (indexInfo->ii_Concurrent)
1559 snapshot = RegisterSnapshot(GetTransactionSnapshot());
1560 OldestXmin = InvalidTransactionId; /* not used */
1562 else
1564 snapshot = SnapshotAny;
1565 /* okay to ignore lazy VACUUMs here */
1566 OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
1569 scan = heap_beginscan_strat(heapRelation, /* relation */
1570 snapshot, /* snapshot */
1571 0, /* number of keys */
1572 NULL, /* scan key */
1573 true, /* buffer access strategy OK */
1574 allow_sync); /* syncscan OK? */
1576 reltuples = 0;
1579 * Scan all tuples in the base relation.
1581 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1583 bool tupleIsAlive;
1585 CHECK_FOR_INTERRUPTS();
1588 * When dealing with a HOT-chain of updated tuples, we want to index
1589 * the values of the live tuple (if any), but index it under the TID
1590 * of the chain's root tuple. This approach is necessary to preserve
1591 * the HOT-chain structure in the heap. So we need to be able to find
1592 * the root item offset for every tuple that's in a HOT-chain. When
1593 * first reaching a new page of the relation, call
1594 * heap_get_root_tuples() to build a map of root item offsets on the
1595 * page.
1597 * It might look unsafe to use this information across buffer
1598 * lock/unlock. However, we hold ShareLock on the table so no
1599 * ordinary insert/update/delete should occur; and we hold pin on the
1600 * buffer continuously while visiting the page, so no pruning
1601 * operation can occur either.
1603 * Note the implied assumption that there is no more than one live
1604 * tuple per HOT-chain ...
1606 if (scan->rs_cblock != root_blkno)
1608 Page page = BufferGetPage(scan->rs_cbuf);
1610 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
1611 heap_get_root_tuples(page, root_offsets);
1612 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1614 root_blkno = scan->rs_cblock;
1617 if (snapshot == SnapshotAny)
1619 /* do our own time qual check */
1620 bool indexIt;
1622 recheck:
1625 * We could possibly get away with not locking the buffer here,
1626 * since caller should hold ShareLock on the relation, but let's
1627 * be conservative about it. (This remark is still correct even
1628 * with HOT-pruning: our pin on the buffer prevents pruning.)
1630 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
1632 switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, OldestXmin,
1633 scan->rs_cbuf))
1635 case HEAPTUPLE_DEAD:
1636 /* Definitely dead, we can ignore it */
1637 indexIt = false;
1638 tupleIsAlive = false;
1639 break;
1640 case HEAPTUPLE_LIVE:
1641 /* Normal case, index and unique-check it */
1642 indexIt = true;
1643 tupleIsAlive = true;
1644 break;
1645 case HEAPTUPLE_RECENTLY_DEAD:
1648 * If tuple is recently deleted then we must index it
1649 * anyway to preserve MVCC semantics. (Pre-existing
1650 * transactions could try to use the index after we finish
1651 * building it, and may need to see such tuples.)
1653 * However, if it was HOT-updated then we must only index
1654 * the live tuple at the end of the HOT-chain. Since this
1655 * breaks semantics for pre-existing snapshots, mark the
1656 * index as unusable for them.
1658 * If we've already decided that the index will be unsafe
1659 * for old snapshots, we may as well stop indexing
1660 * recently-dead tuples, since there's no longer any
1661 * point.
1663 if (HeapTupleIsHotUpdated(heapTuple))
1665 indexIt = false;
1666 /* mark the index as unsafe for old snapshots */
1667 indexInfo->ii_BrokenHotChain = true;
1669 else if (indexInfo->ii_BrokenHotChain)
1670 indexIt = false;
1671 else
1672 indexIt = true;
1673 /* In any case, exclude the tuple from unique-checking */
1674 tupleIsAlive = false;
1675 break;
1676 case HEAPTUPLE_INSERT_IN_PROGRESS:
1679 * Since caller should hold ShareLock or better, we should
1680 * not see any tuples inserted by open transactions ---
1681 * unless it's our own transaction. (Consider INSERT
1682 * followed by CREATE INDEX within a transaction.) An
1683 * exception occurs when reindexing a system catalog,
1684 * because we often release lock on system catalogs before
1685 * committing. In that case we wait for the inserting
1686 * transaction to finish and check again. (We could do
1687 * that on user tables too, but since the case is not
1688 * expected it seems better to throw an error.)
1690 if (!TransactionIdIsCurrentTransactionId(
1691 HeapTupleHeaderGetXmin(heapTuple->t_data)))
1693 if (!IsSystemRelation(heapRelation))
1694 elog(ERROR, "concurrent insert in progress");
1695 else
1698 * Must drop the lock on the buffer before we wait
1700 TransactionId xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1702 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1703 XactLockTableWait(xwait);
1704 goto recheck;
1709 * We must index such tuples, since if the index build
1710 * commits then they're good.
1712 indexIt = true;
1713 tupleIsAlive = true;
1714 break;
1715 case HEAPTUPLE_DELETE_IN_PROGRESS:
1718 * Since caller should hold ShareLock or better, we should
1719 * not see any tuples deleted by open transactions ---
1720 * unless it's our own transaction. (Consider DELETE
1721 * followed by CREATE INDEX within a transaction.) An
1722 * exception occurs when reindexing a system catalog,
1723 * because we often release lock on system catalogs before
1724 * committing. In that case we wait for the deleting
1725 * transaction to finish and check again. (We could do
1726 * that on user tables too, but since the case is not
1727 * expected it seems better to throw an error.)
1729 Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
1730 if (!TransactionIdIsCurrentTransactionId(
1731 HeapTupleHeaderGetXmax(heapTuple->t_data)))
1733 if (!IsSystemRelation(heapRelation))
1734 elog(ERROR, "concurrent delete in progress");
1735 else
1738 * Must drop the lock on the buffer before we wait
1740 TransactionId xwait = HeapTupleHeaderGetXmax(heapTuple->t_data);
1742 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1743 XactLockTableWait(xwait);
1744 goto recheck;
1749 * Otherwise, we have to treat these tuples just like
1750 * RECENTLY_DELETED ones.
1752 if (HeapTupleIsHotUpdated(heapTuple))
1754 indexIt = false;
1755 /* mark the index as unsafe for old snapshots */
1756 indexInfo->ii_BrokenHotChain = true;
1758 else if (indexInfo->ii_BrokenHotChain)
1759 indexIt = false;
1760 else
1761 indexIt = true;
1762 /* In any case, exclude the tuple from unique-checking */
1763 tupleIsAlive = false;
1764 break;
1765 default:
1766 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1767 indexIt = tupleIsAlive = false; /* keep compiler quiet */
1768 break;
1771 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1773 if (!indexIt)
1774 continue;
1776 else
1778 /* heap_getnext did the time qual check */
1779 tupleIsAlive = true;
1782 reltuples += 1;
1784 MemoryContextReset(econtext->ecxt_per_tuple_memory);
1786 /* Set up for predicate or expression evaluation */
1787 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
1790 * In a partial index, discard tuples that don't satisfy the
1791 * predicate.
1793 if (predicate != NIL)
1795 if (!ExecQual(predicate, econtext, false))
1796 continue;
1800 * For the current heap tuple, extract all the attributes we use in
1801 * this index, and note which are null. This also performs evaluation
1802 * of any expressions needed.
1804 FormIndexDatum(indexInfo,
1805 slot,
1806 estate,
1807 values,
1808 isnull);
1811 * You'd think we should go ahead and build the index tuple here, but
1812 * some index AMs want to do further processing on the data first. So
1813 * pass the values[] and isnull[] arrays, instead.
1816 if (HeapTupleIsHeapOnly(heapTuple))
1819 * For a heap-only tuple, pretend its TID is that of the root. See
1820 * src/backend/access/heap/README.HOT for discussion.
1822 HeapTupleData rootTuple;
1823 OffsetNumber offnum;
1825 rootTuple = *heapTuple;
1826 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1828 Assert(OffsetNumberIsValid(root_offsets[offnum - 1]));
1830 ItemPointerSetOffsetNumber(&rootTuple.t_self,
1831 root_offsets[offnum - 1]);
1833 /* Call the AM's callback routine to process the tuple */
1834 callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
1835 callback_state);
1837 else
1839 /* Call the AM's callback routine to process the tuple */
1840 callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
1841 callback_state);
1845 heap_endscan(scan);
1847 /* we can now forget our snapshot, if set */
1848 if (indexInfo->ii_Concurrent)
1849 UnregisterSnapshot(snapshot);
1851 ExecDropSingleTupleTableSlot(slot);
1853 FreeExecutorState(estate);
1855 /* These may have been pointing to the now-gone estate */
1856 indexInfo->ii_ExpressionsState = NIL;
1857 indexInfo->ii_PredicateState = NIL;
1859 return reltuples;
1864 * validate_index - support code for concurrent index builds
1866 * We do a concurrent index build by first inserting the catalog entry for the
1867 * index via index_create(), marking it not indisready and not indisvalid.
1868 * Then we commit our transaction and start a new one, then we wait for all
1869 * transactions that could have been modifying the table to terminate. Now
1870 * we know that any subsequently-started transactions will see the index and
1871 * honor its constraints on HOT updates; so while existing HOT-chains might
1872 * be broken with respect to the index, no currently live tuple will have an
1873 * incompatible HOT update done to it. We now build the index normally via
1874 * index_build(), while holding a weak lock that allows concurrent
1875 * insert/update/delete. Also, we index only tuples that are valid
1876 * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
1877 * build takes care to include recently-dead tuples. This is OK because
1878 * we won't mark the index valid until all transactions that might be able
1879 * to see those tuples are gone. The reason for doing that is to avoid
1880 * bogus unique-index failures due to concurrent UPDATEs (we might see
1881 * different versions of the same row as being valid when we pass over them,
1882 * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
1883 * does not contain any tuples added to the table while we built the index.
1885 * Next, we mark the index "indisready" (but still not "indisvalid") and
1886 * commit the second transaction and start a third. Again we wait for all
1887 * transactions that could have been modifying the table to terminate. Now
1888 * we know that any subsequently-started transactions will see the index and
1889 * insert their new tuples into it. We then take a new reference snapshot
1890 * which is passed to validate_index(). Any tuples that are valid according
1891 * to this snap, but are not in the index, must be added to the index.
1892 * (Any tuples committed live after the snap will be inserted into the
1893 * index by their originating transaction. Any tuples committed dead before
1894 * the snap need not be indexed, because we will wait out all transactions
1895 * that might care about them before we mark the index valid.)
1897 * validate_index() works by first gathering all the TIDs currently in the
1898 * index, using a bulkdelete callback that just stores the TIDs and doesn't
1899 * ever say "delete it". (This should be faster than a plain indexscan;
1900 * also, not all index AMs support full-index indexscan.) Then we sort the
1901 * TIDs, and finally scan the table doing a "merge join" against the TID list
1902 * to see which tuples are missing from the index. Thus we will ensure that
1903 * all tuples valid according to the reference snapshot are in the index.
1905 * Building a unique index this way is tricky: we might try to insert a
1906 * tuple that is already dead or is in process of being deleted, and we
1907 * mustn't have a uniqueness failure against an updated version of the same
1908 * row. We could try to check the tuple to see if it's already dead and tell
1909 * index_insert() not to do the uniqueness check, but that still leaves us
1910 * with a race condition against an in-progress update. To handle that,
1911 * we expect the index AM to recheck liveness of the to-be-inserted tuple
1912 * before it declares a uniqueness error.
1914 * After completing validate_index(), we wait until all transactions that
1915 * were alive at the time of the reference snapshot are gone; this is
1916 * necessary to be sure there are none left with a serializable snapshot
1917 * older than the reference (and hence possibly able to see tuples we did
1918 * not index). Then we mark the index "indisvalid" and commit. Subsequent
1919 * transactions will be able to use it for queries.
1921 * Doing two full table scans is a brute-force strategy. We could try to be
1922 * cleverer, eg storing new tuples in a special area of the table (perhaps
1923 * making the table append-only by setting use_fsm). However that would
1924 * add yet more locking issues.
1926 void
1927 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
1929 Relation heapRelation,
1930 indexRelation;
1931 IndexInfo *indexInfo;
1932 IndexVacuumInfo ivinfo;
1933 v_i_state state;
1934 Oid save_userid;
1935 bool save_secdefcxt;
1937 /* Open and lock the parent heap relation */
1938 heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1939 /* And the target index relation */
1940 indexRelation = index_open(indexId, RowExclusiveLock);
1943 * Fetch info needed for index_insert. (You might think this should be
1944 * passed in from DefineIndex, but its copy is long gone due to having
1945 * been built in a previous transaction.)
1947 indexInfo = BuildIndexInfo(indexRelation);
1949 /* mark build is concurrent just for consistency */
1950 indexInfo->ii_Concurrent = true;
1953 * Switch to the table owner's userid, so that any index functions are
1954 * run as that user.
1956 GetUserIdAndContext(&save_userid, &save_secdefcxt);
1957 SetUserIdAndContext(heapRelation->rd_rel->relowner, true);
1960 * Scan the index and gather up all the TIDs into a tuplesort object.
1962 ivinfo.index = indexRelation;
1963 ivinfo.vacuum_full = false;
1964 ivinfo.message_level = DEBUG2;
1965 ivinfo.num_heap_tuples = -1;
1966 ivinfo.strategy = NULL;
1968 state.tuplesort = tuplesort_begin_datum(TIDOID,
1969 TIDLessOperator, false,
1970 maintenance_work_mem,
1971 false);
1972 state.htups = state.itups = state.tups_inserted = 0;
1974 (void) index_bulk_delete(&ivinfo, NULL,
1975 validate_index_callback, (void *) &state);
1977 /* Execute the sort */
1978 tuplesort_performsort(state.tuplesort);
1981 * Now scan the heap and "merge" it with the index
1983 validate_index_heapscan(heapRelation,
1984 indexRelation,
1985 indexInfo,
1986 snapshot,
1987 &state);
1989 /* Done with tuplesort object */
1990 tuplesort_end(state.tuplesort);
1992 elog(DEBUG2,
1993 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
1994 state.htups, state.itups, state.tups_inserted);
1996 /* Restore userid */
1997 SetUserIdAndContext(save_userid, save_secdefcxt);
1999 /* Close rels, but keep locks */
2000 index_close(indexRelation, NoLock);
2001 heap_close(heapRelation, NoLock);
2005 * validate_index_callback - bulkdelete callback to collect the index TIDs
2007 static bool
2008 validate_index_callback(ItemPointer itemptr, void *opaque)
2010 v_i_state *state = (v_i_state *) opaque;
2012 tuplesort_putdatum(state->tuplesort, PointerGetDatum(itemptr), false);
2013 state->itups += 1;
2014 return false; /* never actually delete anything */
2018 * validate_index_heapscan - second table scan for concurrent index build
2020 * This has much code in common with IndexBuildHeapScan, but it's enough
2021 * different that it seems cleaner to have two routines not one.
2023 static void
2024 validate_index_heapscan(Relation heapRelation,
2025 Relation indexRelation,
2026 IndexInfo *indexInfo,
2027 Snapshot snapshot,
2028 v_i_state *state)
2030 HeapScanDesc scan;
2031 HeapTuple heapTuple;
2032 Datum values[INDEX_MAX_KEYS];
2033 bool isnull[INDEX_MAX_KEYS];
2034 List *predicate;
2035 TupleTableSlot *slot;
2036 EState *estate;
2037 ExprContext *econtext;
2038 BlockNumber root_blkno = InvalidBlockNumber;
2039 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2040 bool in_index[MaxHeapTuplesPerPage];
2042 /* state variables for the merge */
2043 ItemPointer indexcursor = NULL;
2044 bool tuplesort_empty = false;
2047 * sanity checks
2049 Assert(OidIsValid(indexRelation->rd_rel->relam));
2052 * Need an EState for evaluation of index expressions and partial-index
2053 * predicates. Also a slot to hold the current tuple.
2055 estate = CreateExecutorState();
2056 econtext = GetPerTupleExprContext(estate);
2057 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2059 /* Arrange for econtext's scan tuple to be the tuple under test */
2060 econtext->ecxt_scantuple = slot;
2062 /* Set up execution state for predicate, if any. */
2063 predicate = (List *)
2064 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2065 estate);
2068 * Prepare for scan of the base relation. We need just those tuples
2069 * satisfying the passed-in reference snapshot. We must disable syncscan
2070 * here, because it's critical that we read from block zero forward to
2071 * match the sorted TIDs.
2073 scan = heap_beginscan_strat(heapRelation, /* relation */
2074 snapshot, /* snapshot */
2075 0, /* number of keys */
2076 NULL, /* scan key */
2077 true, /* buffer access strategy OK */
2078 false); /* syncscan not OK */
2081 * Scan all tuples matching the snapshot.
2083 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2085 ItemPointer heapcursor = &heapTuple->t_self;
2086 ItemPointerData rootTuple;
2087 OffsetNumber root_offnum;
2089 CHECK_FOR_INTERRUPTS();
2091 state->htups += 1;
2094 * As commented in IndexBuildHeapScan, we should index heap-only
2095 * tuples under the TIDs of their root tuples; so when we advance onto
2096 * a new heap page, build a map of root item offsets on the page.
2098 * This complicates merging against the tuplesort output: we will
2099 * visit the live tuples in order by their offsets, but the root
2100 * offsets that we need to compare against the index contents might be
2101 * ordered differently. So we might have to "look back" within the
2102 * tuplesort output, but only within the current page. We handle that
2103 * by keeping a bool array in_index[] showing all the
2104 * already-passed-over tuplesort output TIDs of the current page. We
2105 * clear that array here, when advancing onto a new heap page.
2107 if (scan->rs_cblock != root_blkno)
2109 Page page = BufferGetPage(scan->rs_cbuf);
2111 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2112 heap_get_root_tuples(page, root_offsets);
2113 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2115 memset(in_index, 0, sizeof(in_index));
2117 root_blkno = scan->rs_cblock;
2120 /* Convert actual tuple TID to root TID */
2121 rootTuple = *heapcursor;
2122 root_offnum = ItemPointerGetOffsetNumber(heapcursor);
2124 if (HeapTupleIsHeapOnly(heapTuple))
2126 root_offnum = root_offsets[root_offnum - 1];
2127 Assert(OffsetNumberIsValid(root_offnum));
2128 ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
2132 * "merge" by skipping through the index tuples until we find or pass
2133 * the current root tuple.
2135 while (!tuplesort_empty &&
2136 (!indexcursor ||
2137 ItemPointerCompare(indexcursor, &rootTuple) < 0))
2139 Datum ts_val;
2140 bool ts_isnull;
2142 if (indexcursor)
2145 * Remember index items seen earlier on the current heap page
2147 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
2148 in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
2149 pfree(indexcursor);
2152 tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
2153 &ts_val, &ts_isnull);
2154 Assert(tuplesort_empty || !ts_isnull);
2155 indexcursor = (ItemPointer) DatumGetPointer(ts_val);
2159 * If the tuplesort has overshot *and* we didn't see a match earlier,
2160 * then this tuple is missing from the index, so insert it.
2162 if ((tuplesort_empty ||
2163 ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
2164 !in_index[root_offnum - 1])
2166 MemoryContextReset(econtext->ecxt_per_tuple_memory);
2168 /* Set up for predicate or expression evaluation */
2169 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2172 * In a partial index, discard tuples that don't satisfy the
2173 * predicate.
2175 if (predicate != NIL)
2177 if (!ExecQual(predicate, econtext, false))
2178 continue;
2182 * For the current heap tuple, extract all the attributes we use
2183 * in this index, and note which are null. This also performs
2184 * evaluation of any expressions needed.
2186 FormIndexDatum(indexInfo,
2187 slot,
2188 estate,
2189 values,
2190 isnull);
2193 * You'd think we should go ahead and build the index tuple here,
2194 * but some index AMs want to do further processing on the data
2195 * first. So pass the values[] and isnull[] arrays, instead.
2199 * If the tuple is already committed dead, you might think we
2200 * could suppress uniqueness checking, but this is no longer true
2201 * in the presence of HOT, because the insert is actually a proxy
2202 * for a uniqueness check on the whole HOT-chain. That is, the
2203 * tuple we have here could be dead because it was already
2204 * HOT-updated, and if so the updating transaction will not have
2205 * thought it should insert index entries. The index AM will
2206 * check the whole HOT-chain and correctly detect a conflict if
2207 * there is one.
2210 index_insert(indexRelation,
2211 values,
2212 isnull,
2213 &rootTuple,
2214 heapRelation,
2215 indexInfo->ii_Unique);
2217 state->tups_inserted += 1;
2221 heap_endscan(scan);
2223 ExecDropSingleTupleTableSlot(slot);
2225 FreeExecutorState(estate);
2227 /* These may have been pointing to the now-gone estate */
2228 indexInfo->ii_ExpressionsState = NIL;
2229 indexInfo->ii_PredicateState = NIL;
2234 * IndexGetRelation: given an index's relation OID, get the OID of the
2235 * relation it is an index on. Uses the system cache.
2237 static Oid
2238 IndexGetRelation(Oid indexId)
2240 HeapTuple tuple;
2241 Form_pg_index index;
2242 Oid result;
2244 tuple = SearchSysCache(INDEXRELID,
2245 ObjectIdGetDatum(indexId),
2246 0, 0, 0);
2247 if (!HeapTupleIsValid(tuple))
2248 elog(ERROR, "cache lookup failed for index %u", indexId);
2249 index = (Form_pg_index) GETSTRUCT(tuple);
2250 Assert(index->indexrelid == indexId);
2252 result = index->indrelid;
2253 ReleaseSysCache(tuple);
2254 return result;
2258 * reindex_index - This routine is used to recreate a single index
2260 void
2261 reindex_index(Oid indexId)
2263 Relation iRel,
2264 heapRelation,
2265 pg_index;
2266 Oid heapId;
2267 bool inplace;
2268 HeapTuple indexTuple;
2269 Form_pg_index indexForm;
2272 * Open and lock the parent heap relation. ShareLock is sufficient since
2273 * we only need to be sure no schema or data changes are going on.
2275 heapId = IndexGetRelation(indexId);
2276 heapRelation = heap_open(heapId, ShareLock);
2279 * Open the target index relation and get an exclusive lock on it, to
2280 * ensure that no one else is touching this particular index.
2282 iRel = index_open(indexId, AccessExclusiveLock);
2285 * Don't allow reindex on temp tables of other backends ... their local
2286 * buffer manager is not going to cope.
2288 if (isOtherTempNamespace(RelationGetNamespace(iRel)))
2289 ereport(ERROR,
2290 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2291 errmsg("cannot reindex temporary tables of other sessions")));
2294 * Also check for active uses of the index in the current transaction;
2295 * we don't want to reindex underneath an open indexscan.
2297 CheckTableNotInUse(iRel, "REINDEX INDEX");
2300 * If it's a shared index, we must do inplace processing (because we have
2301 * no way to update relfilenode in other databases). Otherwise we can do
2302 * it the normal transaction-safe way.
2304 * Since inplace processing isn't crash-safe, we only allow it in a
2305 * standalone backend. (In the REINDEX TABLE and REINDEX DATABASE cases,
2306 * the caller should have detected this.)
2308 inplace = iRel->rd_rel->relisshared;
2310 if (inplace && IsUnderPostmaster)
2311 ereport(ERROR,
2312 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2313 errmsg("shared index \"%s\" can only be reindexed in stand-alone mode",
2314 RelationGetRelationName(iRel))));
2316 PG_TRY();
2318 IndexInfo *indexInfo;
2320 /* Suppress use of the target index while rebuilding it */
2321 SetReindexProcessing(heapId, indexId);
2323 /* Fetch info needed for index_build */
2324 indexInfo = BuildIndexInfo(iRel);
2326 if (inplace)
2329 * Truncate the actual file (and discard buffers). The indexam
2330 * is responsible for truncating the FSM, if applicable
2332 RelationTruncate(iRel, 0);
2334 else
2337 * We'll build a new physical relation for the index.
2339 setNewRelfilenode(iRel, InvalidTransactionId);
2342 /* Initialize the index and rebuild */
2343 /* Note: we do not need to re-establish pkey setting */
2344 index_build(heapRelation, iRel, indexInfo, false);
2346 PG_CATCH();
2348 /* Make sure flag gets cleared on error exit */
2349 ResetReindexProcessing();
2350 PG_RE_THROW();
2352 PG_END_TRY();
2353 ResetReindexProcessing();
2356 * If the index is marked invalid or not ready (ie, it's from a failed
2357 * CREATE INDEX CONCURRENTLY), we can now mark it valid. This allows
2358 * REINDEX to be used to clean up in such cases.
2360 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
2362 indexTuple = SearchSysCacheCopy(INDEXRELID,
2363 ObjectIdGetDatum(indexId),
2364 0, 0, 0);
2365 if (!HeapTupleIsValid(indexTuple))
2366 elog(ERROR, "cache lookup failed for index %u", indexId);
2367 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2369 if (!indexForm->indisvalid || !indexForm->indisready)
2371 indexForm->indisvalid = true;
2372 indexForm->indisready = true;
2373 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
2374 CatalogUpdateIndexes(pg_index, indexTuple);
2376 heap_close(pg_index, RowExclusiveLock);
2378 /* Close rels, but keep locks */
2379 index_close(iRel, NoLock);
2380 heap_close(heapRelation, NoLock);
2384 * reindex_relation - This routine is used to recreate all indexes
2385 * of a relation (and optionally its toast relation too, if any).
2387 * Returns true if any indexes were rebuilt. Note that a
2388 * CommandCounterIncrement will occur after each index rebuild.
2390 bool
2391 reindex_relation(Oid relid, bool toast_too)
2393 Relation rel;
2394 Oid toast_relid;
2395 bool is_pg_class;
2396 bool result;
2397 List *indexIds,
2398 *doneIndexes;
2399 ListCell *indexId;
2402 * Open and lock the relation. ShareLock is sufficient since we only need
2403 * to prevent schema and data changes in it.
2405 rel = heap_open(relid, ShareLock);
2407 toast_relid = rel->rd_rel->reltoastrelid;
2410 * Get the list of index OIDs for this relation. (We trust to the
2411 * relcache to get this with a sequential scan if ignoring system
2412 * indexes.)
2414 indexIds = RelationGetIndexList(rel);
2417 * reindex_index will attempt to update the pg_class rows for the relation
2418 * and index. If we are processing pg_class itself, we want to make sure
2419 * that the updates do not try to insert index entries into indexes we
2420 * have not processed yet. (When we are trying to recover from corrupted
2421 * indexes, that could easily cause a crash.) We can accomplish this
2422 * because CatalogUpdateIndexes will use the relcache's index list to know
2423 * which indexes to update. We just force the index list to be only the
2424 * stuff we've processed.
2426 * It is okay to not insert entries into the indexes we have not processed
2427 * yet because all of this is transaction-safe. If we fail partway
2428 * through, the updated rows are dead and it doesn't matter whether they
2429 * have index entries. Also, a new pg_class index will be created with an
2430 * entry for its own pg_class row because we do setNewRelfilenode() before
2431 * we do index_build().
2433 * Note that we also clear pg_class's rd_oidindex until the loop is done,
2434 * so that that index can't be accessed either. This means we cannot
2435 * safely generate new relation OIDs while in the loop; shouldn't be a
2436 * problem.
2438 is_pg_class = (RelationGetRelid(rel) == RelationRelationId);
2440 /* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */
2441 if (is_pg_class)
2442 (void) RelationGetIndexAttrBitmap(rel);
2444 /* Reindex all the indexes. */
2445 doneIndexes = NIL;
2446 foreach(indexId, indexIds)
2448 Oid indexOid = lfirst_oid(indexId);
2450 if (is_pg_class)
2451 RelationSetIndexList(rel, doneIndexes, InvalidOid);
2453 reindex_index(indexOid);
2455 CommandCounterIncrement();
2457 if (is_pg_class)
2458 doneIndexes = lappend_oid(doneIndexes, indexOid);
2461 if (is_pg_class)
2462 RelationSetIndexList(rel, indexIds, ClassOidIndexId);
2465 * Close rel, but continue to hold the lock.
2467 heap_close(rel, NoLock);
2469 result = (indexIds != NIL);
2472 * If the relation has a secondary toast rel, reindex that too while we
2473 * still hold the lock on the master table.
2475 if (toast_too && OidIsValid(toast_relid))
2476 result |= reindex_relation(toast_relid, false);
2478 return result;