1 /*-------------------------------------------------------------------------
4 * code to create and destroy POSTGRES index relations
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
15 * index_create() - Create a cataloged index relation
16 * index_drop() - Removes index relation from catalogs
17 * BuildIndexInfo() - Prepare to insert index tuples
18 * FormIndexDatum() - Construct datum vector for one index tuple
20 *-------------------------------------------------------------------------
26 #include "access/genam.h"
27 #include "access/heapam.h"
28 #include "access/relscan.h"
29 #include "access/sysattr.h"
30 #include "access/transam.h"
31 #include "access/xact.h"
32 #include "bootstrap/bootstrap.h"
33 #include "catalog/catalog.h"
34 #include "catalog/dependency.h"
35 #include "catalog/heap.h"
36 #include "catalog/index.h"
37 #include "catalog/indexing.h"
38 #include "catalog/namespace.h"
39 #include "catalog/pg_constraint.h"
40 #include "catalog/pg_operator.h"
41 #include "catalog/pg_opclass.h"
42 #include "catalog/pg_tablespace.h"
43 #include "catalog/pg_type.h"
44 #include "commands/tablecmds.h"
45 #include "executor/executor.h"
46 #include "miscadmin.h"
47 #include "nodes/nodeFuncs.h"
48 #include "optimizer/clauses.h"
49 #include "optimizer/var.h"
50 #include "storage/bufmgr.h"
51 #include "storage/lmgr.h"
52 #include "storage/procarray.h"
53 #include "storage/smgr.h"
54 #include "utils/builtins.h"
55 #include "utils/fmgroids.h"
56 #include "utils/inval.h"
57 #include "utils/lsyscache.h"
58 #include "utils/memutils.h"
59 #include "utils/relcache.h"
60 #include "utils/syscache.h"
61 #include "utils/tuplesort.h"
62 #include "utils/snapmgr.h"
63 #include "utils/tqual.h"
66 /* state info for validate_index bulkdelete callback */
69 Tuplesortstate
*tuplesort
; /* for sorting the index TIDs */
70 /* statistics (for debug purposes only): */
76 /* non-export function prototypes */
77 static TupleDesc
ConstructTupleDescriptor(Relation heapRelation
,
79 Oid accessMethodObjectId
,
81 static void InitializeAttributeOids(Relation indexRelation
,
82 int numatts
, Oid indexoid
);
83 static void AppendAttributeTuples(Relation indexRelation
, int numatts
);
84 static void UpdateIndexRelation(Oid indexoid
, Oid heapoid
,
90 static void index_update_stats(Relation rel
, bool hasindex
, bool isprimary
,
91 Oid reltoastidxid
, double reltuples
);
92 static bool validate_index_callback(ItemPointer itemptr
, void *opaque
);
93 static void validate_index_heapscan(Relation heapRelation
,
94 Relation indexRelation
,
98 static Oid
IndexGetRelation(Oid indexId
);
102 * ConstructTupleDescriptor
104 * Build an index tuple descriptor for a new index
107 ConstructTupleDescriptor(Relation heapRelation
,
108 IndexInfo
*indexInfo
,
109 Oid accessMethodObjectId
,
112 int numatts
= indexInfo
->ii_NumIndexAttrs
;
113 ListCell
*indexpr_item
= list_head(indexInfo
->ii_Expressions
);
116 TupleDesc heapTupDesc
;
117 TupleDesc indexTupDesc
;
118 int natts
; /* #atts in heap rel --- for error checks */
121 /* We need access to the index AM's pg_am tuple */
122 amtuple
= SearchSysCache(AMOID
,
123 ObjectIdGetDatum(accessMethodObjectId
),
125 if (!HeapTupleIsValid(amtuple
))
126 elog(ERROR
, "cache lookup failed for access method %u",
127 accessMethodObjectId
);
128 amform
= (Form_pg_am
) GETSTRUCT(amtuple
);
130 /* ... and to the table's tuple descriptor */
131 heapTupDesc
= RelationGetDescr(heapRelation
);
132 natts
= RelationGetForm(heapRelation
)->relnatts
;
135 * allocate the new tuple descriptor
137 indexTupDesc
= CreateTemplateTupleDesc(numatts
, false);
140 * For simple index columns, we copy the pg_attribute row from the parent
141 * relation and modify it as necessary. For expressions we have to cons
142 * up a pg_attribute row the hard way.
144 for (i
= 0; i
< numatts
; i
++)
146 AttrNumber atnum
= indexInfo
->ii_KeyAttrNumbers
[i
];
147 Form_pg_attribute to
= indexTupDesc
->attrs
[i
];
149 Form_pg_type typeTup
;
150 Form_pg_opclass opclassTup
;
155 /* Simple index column */
156 Form_pg_attribute from
;
161 * here we are indexing on a system attribute (-1...-n)
163 from
= SystemAttributeDefinition(atnum
,
164 heapRelation
->rd_rel
->relhasoids
);
169 * here we are indexing on a normal attribute (1...n)
171 if (atnum
> natts
) /* safety check */
172 elog(ERROR
, "invalid column number %d", atnum
);
173 from
= heapTupDesc
->attrs
[AttrNumberGetAttrOffset(atnum
)];
177 * now that we've determined the "from", let's copy the tuple desc
180 memcpy(to
, from
, ATTRIBUTE_TUPLE_SIZE
);
183 * Fix the stuff that should not be the same as the underlying
188 to
->attstattarget
= -1;
189 to
->attcacheoff
= -1;
190 to
->attnotnull
= false;
191 to
->atthasdef
= false;
192 to
->attislocal
= true;
197 /* Expressional index */
200 MemSet(to
, 0, ATTRIBUTE_TUPLE_SIZE
);
202 if (indexpr_item
== NULL
) /* shouldn't happen */
203 elog(ERROR
, "too few entries in indexprs list");
204 indexkey
= (Node
*) lfirst(indexpr_item
);
205 indexpr_item
= lnext(indexpr_item
);
208 * Make the attribute's name "pg_expresssion_nnn" (maybe think of
209 * something better later)
211 sprintf(NameStr(to
->attname
), "pg_expression_%d", i
+ 1);
214 * Lookup the expression type in pg_type for the type length etc.
216 keyType
= exprType(indexkey
);
217 tuple
= SearchSysCache(TYPEOID
,
218 ObjectIdGetDatum(keyType
),
220 if (!HeapTupleIsValid(tuple
))
221 elog(ERROR
, "cache lookup failed for type %u", keyType
);
222 typeTup
= (Form_pg_type
) GETSTRUCT(tuple
);
225 * Assign some of the attributes values. Leave the rest as 0.
228 to
->atttypid
= keyType
;
229 to
->attlen
= typeTup
->typlen
;
230 to
->attbyval
= typeTup
->typbyval
;
231 to
->attstorage
= typeTup
->typstorage
;
232 to
->attalign
= typeTup
->typalign
;
233 to
->attstattarget
= -1;
234 to
->attcacheoff
= -1;
236 to
->attislocal
= true;
238 ReleaseSysCache(tuple
);
242 * We do not yet have the correct relation OID for the index, so just
243 * set it invalid for now. InitializeAttributeOids() will fix it
246 to
->attrelid
= InvalidOid
;
249 * Check the opclass and index AM to see if either provides a keytype
250 * (overriding the attribute type). Opclass takes precedence.
252 tuple
= SearchSysCache(CLAOID
,
253 ObjectIdGetDatum(classObjectId
[i
]),
255 if (!HeapTupleIsValid(tuple
))
256 elog(ERROR
, "cache lookup failed for opclass %u",
258 opclassTup
= (Form_pg_opclass
) GETSTRUCT(tuple
);
259 if (OidIsValid(opclassTup
->opckeytype
))
260 keyType
= opclassTup
->opckeytype
;
262 keyType
= amform
->amkeytype
;
263 ReleaseSysCache(tuple
);
265 if (OidIsValid(keyType
) && keyType
!= to
->atttypid
)
267 /* index value and heap value have different types */
268 tuple
= SearchSysCache(TYPEOID
,
269 ObjectIdGetDatum(keyType
),
271 if (!HeapTupleIsValid(tuple
))
272 elog(ERROR
, "cache lookup failed for type %u", keyType
);
273 typeTup
= (Form_pg_type
) GETSTRUCT(tuple
);
275 to
->atttypid
= keyType
;
277 to
->attlen
= typeTup
->typlen
;
278 to
->attbyval
= typeTup
->typbyval
;
279 to
->attalign
= typeTup
->typalign
;
280 to
->attstorage
= typeTup
->typstorage
;
282 ReleaseSysCache(tuple
);
286 ReleaseSysCache(amtuple
);
291 /* ----------------------------------------------------------------
292 * InitializeAttributeOids
293 * ----------------------------------------------------------------
296 InitializeAttributeOids(Relation indexRelation
,
300 TupleDesc tupleDescriptor
;
303 tupleDescriptor
= RelationGetDescr(indexRelation
);
305 for (i
= 0; i
< numatts
; i
+= 1)
306 tupleDescriptor
->attrs
[i
]->attrelid
= indexoid
;
309 /* ----------------------------------------------------------------
310 * AppendAttributeTuples
311 * ----------------------------------------------------------------
314 AppendAttributeTuples(Relation indexRelation
, int numatts
)
316 Relation pg_attribute
;
317 CatalogIndexState indstate
;
318 TupleDesc indexTupDesc
;
323 * open the attribute relation and its indexes
325 pg_attribute
= heap_open(AttributeRelationId
, RowExclusiveLock
);
327 indstate
= CatalogOpenIndexes(pg_attribute
);
330 * insert data from new index's tupdesc into pg_attribute
332 indexTupDesc
= RelationGetDescr(indexRelation
);
334 for (i
= 0; i
< numatts
; i
++)
337 * There used to be very grotty code here to set these fields, but I
338 * think it's unnecessary. They should be set already.
340 Assert(indexTupDesc
->attrs
[i
]->attnum
== i
+ 1);
341 Assert(indexTupDesc
->attrs
[i
]->attcacheoff
== -1);
343 new_tuple
= heap_addheader(Natts_pg_attribute
,
345 ATTRIBUTE_TUPLE_SIZE
,
346 (void *) indexTupDesc
->attrs
[i
]);
348 simple_heap_insert(pg_attribute
, new_tuple
);
350 CatalogIndexInsert(indstate
, new_tuple
);
352 heap_freetuple(new_tuple
);
355 CatalogCloseIndexes(indstate
);
357 heap_close(pg_attribute
, RowExclusiveLock
);
360 /* ----------------------------------------------------------------
361 * UpdateIndexRelation
363 * Construct and insert a new entry in the pg_index catalog
364 * ----------------------------------------------------------------
367 UpdateIndexRelation(Oid indexoid
,
369 IndexInfo
*indexInfo
,
377 int2vector
*indoption
;
380 Datum values
[Natts_pg_index
];
381 char nulls
[Natts_pg_index
];
387 * Copy the index key, opclass, and indoption info into arrays (should we
388 * make the caller pass them like this to start with?)
390 indkey
= buildint2vector(NULL
, indexInfo
->ii_NumIndexAttrs
);
391 for (i
= 0; i
< indexInfo
->ii_NumIndexAttrs
; i
++)
392 indkey
->values
[i
] = indexInfo
->ii_KeyAttrNumbers
[i
];
393 indclass
= buildoidvector(classOids
, indexInfo
->ii_NumIndexAttrs
);
394 indoption
= buildint2vector(coloptions
, indexInfo
->ii_NumIndexAttrs
);
397 * Convert the index expressions (if any) to a text datum
399 if (indexInfo
->ii_Expressions
!= NIL
)
403 exprsString
= nodeToString(indexInfo
->ii_Expressions
);
404 exprsDatum
= CStringGetTextDatum(exprsString
);
408 exprsDatum
= (Datum
) 0;
411 * Convert the index predicate (if any) to a text datum. Note we convert
412 * implicit-AND format to normal explicit-AND for storage.
414 if (indexInfo
->ii_Predicate
!= NIL
)
418 predString
= nodeToString(make_ands_explicit(indexInfo
->ii_Predicate
));
419 predDatum
= CStringGetTextDatum(predString
);
423 predDatum
= (Datum
) 0;
426 * open the system catalog index relation
428 pg_index
= heap_open(IndexRelationId
, RowExclusiveLock
);
431 * Build a pg_index tuple
433 MemSet(nulls
, ' ', sizeof(nulls
));
435 values
[Anum_pg_index_indexrelid
- 1] = ObjectIdGetDatum(indexoid
);
436 values
[Anum_pg_index_indrelid
- 1] = ObjectIdGetDatum(heapoid
);
437 values
[Anum_pg_index_indnatts
- 1] = Int16GetDatum(indexInfo
->ii_NumIndexAttrs
);
438 values
[Anum_pg_index_indisunique
- 1] = BoolGetDatum(indexInfo
->ii_Unique
);
439 values
[Anum_pg_index_indisprimary
- 1] = BoolGetDatum(primary
);
440 values
[Anum_pg_index_indisclustered
- 1] = BoolGetDatum(false);
441 values
[Anum_pg_index_indisvalid
- 1] = BoolGetDatum(isvalid
);
442 values
[Anum_pg_index_indcheckxmin
- 1] = BoolGetDatum(false);
443 /* we set isvalid and isready the same way */
444 values
[Anum_pg_index_indisready
- 1] = BoolGetDatum(isvalid
);
445 values
[Anum_pg_index_indkey
- 1] = PointerGetDatum(indkey
);
446 values
[Anum_pg_index_indclass
- 1] = PointerGetDatum(indclass
);
447 values
[Anum_pg_index_indoption
- 1] = PointerGetDatum(indoption
);
448 values
[Anum_pg_index_indexprs
- 1] = exprsDatum
;
449 if (exprsDatum
== (Datum
) 0)
450 nulls
[Anum_pg_index_indexprs
- 1] = 'n';
451 values
[Anum_pg_index_indpred
- 1] = predDatum
;
452 if (predDatum
== (Datum
) 0)
453 nulls
[Anum_pg_index_indpred
- 1] = 'n';
455 tuple
= heap_formtuple(RelationGetDescr(pg_index
), values
, nulls
);
458 * insert the tuple into the pg_index catalog
460 simple_heap_insert(pg_index
, tuple
);
462 /* update the indexes on pg_index */
463 CatalogUpdateIndexes(pg_index
, tuple
);
466 * close the relation and free the tuple
468 heap_close(pg_index
, RowExclusiveLock
);
469 heap_freetuple(tuple
);
476 * heapRelationId: OID of table to build index on
477 * indexRelationName: what it say
478 * indexRelationId: normally, pass InvalidOid to let this routine
479 * generate an OID for the index. During bootstrap this may be
480 * nonzero to specify a preselected OID.
481 * indexInfo: same info executor uses to insert into the index
482 * accessMethodObjectId: OID of index AM to use
483 * tableSpaceId: OID of tablespace to use
484 * classObjectId: array of index opclass OIDs, one per index column
485 * coloptions: array of per-index-column indoption settings
486 * reloptions: AM-specific options
487 * isprimary: index is a PRIMARY KEY
488 * isconstraint: index is owned by a PRIMARY KEY or UNIQUE constraint
489 * allow_system_table_mods: allow table to be a system catalog
490 * skip_build: true to skip the index_build() step for the moment; caller
491 * must do it later (typically via reindex_index())
492 * concurrent: if true, do not lock the table against writers. The index
493 * will be marked "invalid" and the caller must take additional steps
496 * Returns OID of the created index.
499 index_create(Oid heapRelationId
,
500 const char *indexRelationName
,
502 IndexInfo
*indexInfo
,
503 Oid accessMethodObjectId
,
510 bool allow_system_table_mods
,
515 Relation heapRelation
;
516 Relation indexRelation
;
517 TupleDesc indexTupDesc
;
518 bool shared_relation
;
522 pg_class
= heap_open(RelationRelationId
, RowExclusiveLock
);
525 * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
526 * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
529 heapRelation
= heap_open(heapRelationId
,
530 (concurrent
? ShareUpdateExclusiveLock
: ShareLock
));
533 * The index will be in the same namespace as its parent table, and is
534 * shared across databases if and only if the parent is.
536 namespaceId
= RelationGetNamespace(heapRelation
);
537 shared_relation
= heapRelation
->rd_rel
->relisshared
;
542 if (indexInfo
->ii_NumIndexAttrs
< 1)
543 elog(ERROR
, "must index at least one column");
545 if (!allow_system_table_mods
&&
546 IsSystemRelation(heapRelation
) &&
547 IsNormalProcessingMode())
549 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
550 errmsg("user-defined indexes on system catalog tables are not supported")));
553 * concurrent index build on a system catalog is unsafe because we tend to
554 * release locks before committing in catalogs
557 IsSystemRelation(heapRelation
))
559 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
560 errmsg("concurrent index creation on system catalog tables is not supported")));
563 * We cannot allow indexing a shared relation after initdb (because
564 * there's no way to make the entry in other databases' pg_class).
566 if (shared_relation
&& !IsBootstrapProcessingMode())
568 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
569 errmsg("shared indexes cannot be created after initdb")));
572 * Validate shared/non-shared tablespace (must check this before doing
573 * GetNewRelFileNode, to prevent Assert therein)
577 if (tableSpaceId
!= GLOBALTABLESPACE_OID
)
578 /* elog since this is not a user-facing error */
580 "shared relations must be placed in pg_global tablespace");
584 if (tableSpaceId
== GLOBALTABLESPACE_OID
)
586 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
587 errmsg("only shared relations can be placed in pg_global tablespace")));
590 if (get_relname_relid(indexRelationName
, namespaceId
))
592 (errcode(ERRCODE_DUPLICATE_TABLE
),
593 errmsg("relation \"%s\" already exists",
594 indexRelationName
)));
597 * construct tuple descriptor for index tuples
599 indexTupDesc
= ConstructTupleDescriptor(heapRelation
,
601 accessMethodObjectId
,
605 * Allocate an OID for the index, unless we were told what to use.
607 * The OID will be the relfilenode as well, so make sure it doesn't
608 * collide with either pg_class OIDs or existing physical files.
610 if (!OidIsValid(indexRelationId
))
611 indexRelationId
= GetNewRelFileNode(tableSpaceId
, shared_relation
,
615 * create the index relation's relcache entry and physical disk file. (If
616 * we fail further down, it's the smgr's responsibility to remove the disk
619 indexRelation
= heap_create(indexRelationName
,
626 allow_system_table_mods
);
628 Assert(indexRelationId
== RelationGetRelid(indexRelation
));
631 * Obtain exclusive lock on it. Although no other backends can see it
632 * until we commit, this prevents deadlock-risk complaints from lock
633 * manager in cases such as CLUSTER.
635 LockRelation(indexRelation
, AccessExclusiveLock
);
638 * Fill in fields of the index's pg_class entry that are not set correctly
641 * XXX should have a cleaner way to create cataloged indexes
643 indexRelation
->rd_rel
->relowner
= heapRelation
->rd_rel
->relowner
;
644 indexRelation
->rd_rel
->relam
= accessMethodObjectId
;
645 indexRelation
->rd_rel
->relkind
= RELKIND_INDEX
;
646 indexRelation
->rd_rel
->relhasoids
= false;
649 * store index's pg_class entry
651 InsertPgClassTuple(pg_class
, indexRelation
,
652 RelationGetRelid(indexRelation
),
655 /* done with pg_class */
656 heap_close(pg_class
, RowExclusiveLock
);
659 * now update the object id's of all the attribute tuple forms in the
660 * index relation's tuple descriptor
662 InitializeAttributeOids(indexRelation
,
663 indexInfo
->ii_NumIndexAttrs
,
667 * append ATTRIBUTE tuples for the index
669 AppendAttributeTuples(indexRelation
, indexInfo
->ii_NumIndexAttrs
);
673 * (append INDEX tuple)
675 * Note that this stows away a representation of "predicate".
676 * (Or, could define a rule to maintain the predicate) --Nels, Feb '92
679 UpdateIndexRelation(indexRelationId
, heapRelationId
, indexInfo
,
680 classObjectId
, coloptions
, isprimary
, !concurrent
);
683 * Register constraint and dependencies for the index.
685 * If the index is from a CONSTRAINT clause, construct a pg_constraint
686 * entry. The index is then linked to the constraint, which in turn is
687 * linked to the table. If it's not a CONSTRAINT, make the dependency
688 * directly on the table.
690 * We don't need a dependency on the namespace, because there'll be an
691 * indirect dependency via our parent table.
693 * During bootstrap we can't register any dependencies, and we don't try
694 * to make a constraint either.
696 if (!IsBootstrapProcessingMode())
698 ObjectAddress myself
,
701 myself
.classId
= RelationRelationId
;
702 myself
.objectId
= indexRelationId
;
703 myself
.objectSubId
= 0;
711 constraintType
= CONSTRAINT_PRIMARY
;
712 else if (indexInfo
->ii_Unique
)
713 constraintType
= CONSTRAINT_UNIQUE
;
716 elog(ERROR
, "constraint must be PRIMARY or UNIQUE");
717 constraintType
= 0; /* keep compiler quiet */
720 /* Shouldn't have any expressions */
721 if (indexInfo
->ii_Expressions
)
722 elog(ERROR
, "constraints cannot have index expressions");
724 conOid
= CreateConstraintEntry(indexRelationName
,
727 false, /* isDeferrable */
728 false, /* isDeferred */
730 indexInfo
->ii_KeyAttrNumbers
,
731 indexInfo
->ii_NumIndexAttrs
,
732 InvalidOid
, /* no domain */
733 InvalidOid
, /* no foreign key */
742 InvalidOid
, /* no associated index */
743 NULL
, /* no check constraint */
749 referenced
.classId
= ConstraintRelationId
;
750 referenced
.objectId
= conOid
;
751 referenced
.objectSubId
= 0;
753 recordDependencyOn(&myself
, &referenced
, DEPENDENCY_INTERNAL
);
757 bool have_simple_col
= false;
759 /* Create auto dependencies on simply-referenced columns */
760 for (i
= 0; i
< indexInfo
->ii_NumIndexAttrs
; i
++)
762 if (indexInfo
->ii_KeyAttrNumbers
[i
] != 0)
764 referenced
.classId
= RelationRelationId
;
765 referenced
.objectId
= heapRelationId
;
766 referenced
.objectSubId
= indexInfo
->ii_KeyAttrNumbers
[i
];
768 recordDependencyOn(&myself
, &referenced
, DEPENDENCY_AUTO
);
770 have_simple_col
= true;
775 * It's possible for an index to not depend on any columns of the
776 * table at all, in which case we need to give it a dependency on
777 * the table as a whole; else it won't get dropped when the table
778 * is dropped. This edge case is not totally useless; for
779 * example, a unique index on a constant expression can serve to
780 * prevent a table from containing more than one row.
782 if (!have_simple_col
&&
783 !contain_vars_of_level((Node
*) indexInfo
->ii_Expressions
, 0) &&
784 !contain_vars_of_level((Node
*) indexInfo
->ii_Predicate
, 0))
786 referenced
.classId
= RelationRelationId
;
787 referenced
.objectId
= heapRelationId
;
788 referenced
.objectSubId
= 0;
790 recordDependencyOn(&myself
, &referenced
, DEPENDENCY_AUTO
);
794 /* Store dependency on operator classes */
795 for (i
= 0; i
< indexInfo
->ii_NumIndexAttrs
; i
++)
797 referenced
.classId
= OperatorClassRelationId
;
798 referenced
.objectId
= classObjectId
[i
];
799 referenced
.objectSubId
= 0;
801 recordDependencyOn(&myself
, &referenced
, DEPENDENCY_NORMAL
);
804 /* Store dependencies on anything mentioned in index expressions */
805 if (indexInfo
->ii_Expressions
)
807 recordDependencyOnSingleRelExpr(&myself
,
808 (Node
*) indexInfo
->ii_Expressions
,
814 /* Store dependencies on anything mentioned in predicate */
815 if (indexInfo
->ii_Predicate
)
817 recordDependencyOnSingleRelExpr(&myself
,
818 (Node
*) indexInfo
->ii_Predicate
,
826 * Advance the command counter so that we can see the newly-entered
827 * catalog tuples for the index.
829 CommandCounterIncrement();
832 * In bootstrap mode, we have to fill in the index strategy structure with
833 * information from the catalogs. If we aren't bootstrapping, then the
834 * relcache entry has already been rebuilt thanks to sinval update during
835 * CommandCounterIncrement.
837 if (IsBootstrapProcessingMode())
838 RelationInitIndexAccessInfo(indexRelation
);
840 Assert(indexRelation
->rd_indexcxt
!= NULL
);
843 * If this is bootstrap (initdb) time, then we don't actually fill in the
844 * index yet. We'll be creating more indexes and classes later, so we
845 * delay filling them in until just before we're done with bootstrapping.
846 * Similarly, if the caller specified skip_build then filling the index is
847 * delayed till later (ALTER TABLE can save work in some cases with this).
848 * Otherwise, we call the AM routine that constructs the index.
850 if (IsBootstrapProcessingMode())
852 index_register(heapRelationId
, indexRelationId
, indexInfo
);
857 * Caller is responsible for filling the index later on. However,
858 * we'd better make sure that the heap relation is correctly marked as
861 index_update_stats(heapRelation
,
865 heapRelation
->rd_rel
->reltuples
);
866 /* Make the above update visible */
867 CommandCounterIncrement();
871 index_build(heapRelation
, indexRelation
, indexInfo
, isprimary
);
875 * Close the heap and index; but we keep the locks that we acquired above
876 * until end of transaction.
878 index_close(indexRelation
, NoLock
);
879 heap_close(heapRelation
, NoLock
);
881 return indexRelationId
;
887 * NOTE: this routine should now only be called through performDeletion(),
888 * else associated dependencies won't be cleaned up.
891 index_drop(Oid indexId
)
894 Relation userHeapRelation
;
895 Relation userIndexRelation
;
896 Relation indexRelation
;
902 * To drop an index safely, we must grab exclusive lock on its parent
903 * table; otherwise there could be other backends using the index!
904 * Exclusive lock on the index alone is insufficient because another
905 * backend might be in the midst of devising a query plan that will use
906 * the index. The parser and planner take care to hold an appropriate
907 * lock on the parent table while working, but having them hold locks on
908 * all the indexes too seems overly expensive. We do grab exclusive lock
909 * on the index too, just to be safe. Both locks must be held till end of
910 * transaction, else other backends will still see this index in pg_index.
912 heapId
= IndexGetRelation(indexId
);
913 userHeapRelation
= heap_open(heapId
, AccessExclusiveLock
);
915 userIndexRelation
= index_open(indexId
, AccessExclusiveLock
);
918 * Schedule physical removal of the files
920 RelationOpenSmgr(userIndexRelation
);
921 for (forknum
= 0; forknum
<= MAX_FORKNUM
; forknum
++)
922 if (smgrexists(userIndexRelation
->rd_smgr
, forknum
))
923 smgrscheduleunlink(userIndexRelation
->rd_smgr
, forknum
,
924 userIndexRelation
->rd_istemp
);
925 RelationCloseSmgr(userIndexRelation
);
928 * Close and flush the index's relcache entry, to ensure relcache doesn't
929 * try to rebuild it while we're deleting catalog entries. We keep the
932 index_close(userIndexRelation
, NoLock
);
934 RelationForgetRelation(indexId
);
937 * fix INDEX relation, and check for expressional index
939 indexRelation
= heap_open(IndexRelationId
, RowExclusiveLock
);
941 tuple
= SearchSysCache(INDEXRELID
,
942 ObjectIdGetDatum(indexId
),
944 if (!HeapTupleIsValid(tuple
))
945 elog(ERROR
, "cache lookup failed for index %u", indexId
);
947 hasexprs
= !heap_attisnull(tuple
, Anum_pg_index_indexprs
);
949 simple_heap_delete(indexRelation
, &tuple
->t_self
);
951 ReleaseSysCache(tuple
);
952 heap_close(indexRelation
, RowExclusiveLock
);
955 * if it has any expression columns, we might have stored statistics about
959 RemoveStatistics(indexId
, 0);
962 * fix ATTRIBUTE relation
964 DeleteAttributeTuples(indexId
);
967 * fix RELATION relation
969 DeleteRelationTuple(indexId
);
972 * We are presently too lazy to attempt to compute the new correct value
973 * of relhasindex (the next VACUUM will fix it if necessary). So there is
974 * no need to update the pg_class tuple for the owning relation. But we
975 * must send out a shared-cache-inval notice on the owning relation to
976 * ensure other backends update their relcache lists of indexes.
978 CacheInvalidateRelcache(userHeapRelation
);
981 * Close owning rel, but keep lock
983 heap_close(userHeapRelation
, NoLock
);
986 /* ----------------------------------------------------------------
987 * index_build support
988 * ----------------------------------------------------------------
993 * Construct an IndexInfo record for an open index
995 * IndexInfo stores the information about the index that's needed by
996 * FormIndexDatum, which is used for both index_build() and later insertion
997 * of individual index tuples. Normally we build an IndexInfo for an index
998 * just once per command, and then use it for (potentially) many tuples.
1002 BuildIndexInfo(Relation index
)
1004 IndexInfo
*ii
= makeNode(IndexInfo
);
1005 Form_pg_index indexStruct
= index
->rd_index
;
1009 /* check the number of keys, and copy attr numbers into the IndexInfo */
1010 numKeys
= indexStruct
->indnatts
;
1011 if (numKeys
< 1 || numKeys
> INDEX_MAX_KEYS
)
1012 elog(ERROR
, "invalid indnatts %d for index %u",
1013 numKeys
, RelationGetRelid(index
));
1014 ii
->ii_NumIndexAttrs
= numKeys
;
1015 for (i
= 0; i
< numKeys
; i
++)
1016 ii
->ii_KeyAttrNumbers
[i
] = indexStruct
->indkey
.values
[i
];
1018 /* fetch any expressions needed for expressional indexes */
1019 ii
->ii_Expressions
= RelationGetIndexExpressions(index
);
1020 ii
->ii_ExpressionsState
= NIL
;
1022 /* fetch index predicate if any */
1023 ii
->ii_Predicate
= RelationGetIndexPredicate(index
);
1024 ii
->ii_PredicateState
= NIL
;
1027 ii
->ii_Unique
= indexStruct
->indisunique
;
1028 ii
->ii_ReadyForInserts
= indexStruct
->indisready
;
1030 /* initialize index-build state to default */
1031 ii
->ii_Concurrent
= false;
1032 ii
->ii_BrokenHotChain
= false;
1039 * Construct values[] and isnull[] arrays for a new index tuple.
1041 * indexInfo Info about the index
1042 * slot Heap tuple for which we must prepare an index entry
1043 * estate executor state for evaluating any index expressions
1044 * values Array of index Datums (output area)
1045 * isnull Array of is-null indicators (output area)
1047 * When there are no index expressions, estate may be NULL. Otherwise it
1048 * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
1049 * context must point to the heap tuple passed in.
1051 * Notice we don't actually call index_form_tuple() here; we just prepare
1052 * its input arrays values[] and isnull[]. This is because the index AM
1053 * may wish to alter the data before storage.
1057 FormIndexDatum(IndexInfo
*indexInfo
,
1058 TupleTableSlot
*slot
,
1063 ListCell
*indexpr_item
;
1066 if (indexInfo
->ii_Expressions
!= NIL
&&
1067 indexInfo
->ii_ExpressionsState
== NIL
)
1069 /* First time through, set up expression evaluation state */
1070 indexInfo
->ii_ExpressionsState
= (List
*)
1071 ExecPrepareExpr((Expr
*) indexInfo
->ii_Expressions
,
1073 /* Check caller has set up context correctly */
1074 Assert(GetPerTupleExprContext(estate
)->ecxt_scantuple
== slot
);
1076 indexpr_item
= list_head(indexInfo
->ii_ExpressionsState
);
1078 for (i
= 0; i
< indexInfo
->ii_NumIndexAttrs
; i
++)
1080 int keycol
= indexInfo
->ii_KeyAttrNumbers
[i
];
1087 * Plain index column; get the value we need directly from the
1090 iDatum
= slot_getattr(slot
, keycol
, &isNull
);
1095 * Index expression --- need to evaluate it.
1097 if (indexpr_item
== NULL
)
1098 elog(ERROR
, "wrong number of index expressions");
1099 iDatum
= ExecEvalExprSwitchContext((ExprState
*) lfirst(indexpr_item
),
1100 GetPerTupleExprContext(estate
),
1103 indexpr_item
= lnext(indexpr_item
);
1109 if (indexpr_item
!= NULL
)
1110 elog(ERROR
, "wrong number of index expressions");
1115 * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
1117 * This routine updates the pg_class row of either an index or its parent
1118 * relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed
1119 * to ensure we can do all the necessary work in just one update.
1121 * hasindex: set relhasindex to this value
1122 * isprimary: if true, set relhaspkey true; else no change
1123 * reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
1125 * reltuples: set reltuples to this value
1127 * relpages is also updated (using RelationGetNumberOfBlocks()).
1129 * NOTE: an important side-effect of this operation is that an SI invalidation
1130 * message is sent out to all backends --- including me --- causing relcache
1131 * entries to be flushed or updated with the new data. This must happen even
1132 * if we find that no change is needed in the pg_class row. When updating
1133 * a heap entry, this ensures that other backends find out about the new
1134 * index. When updating an index, it's important because some index AMs
1135 * expect a relcache flush to occur after REINDEX.
1138 index_update_stats(Relation rel
, bool hasindex
, bool isprimary
,
1139 Oid reltoastidxid
, double reltuples
)
1141 BlockNumber relpages
= RelationGetNumberOfBlocks(rel
);
1142 Oid relid
= RelationGetRelid(rel
);
1145 Form_pg_class rd_rel
;
1149 * We always update the pg_class row using a non-transactional,
1150 * overwrite-in-place update. There are several reasons for this:
1152 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
1154 * 2. We could be reindexing pg_class itself, in which case we can't move
1155 * its pg_class row because CatalogUpdateIndexes might not know about all
1156 * the indexes yet (see reindex_relation).
1158 * 3. Because we execute CREATE INDEX with just share lock on the parent
1159 * rel (to allow concurrent index creations), an ordinary update could
1160 * suffer a tuple-concurrently-updated failure against another CREATE
1161 * INDEX committing at about the same time. We can avoid that by having
1162 * them both do nontransactional updates (we assume they will both be
1163 * trying to change the pg_class row to the same thing, so it doesn't
1164 * matter which goes first).
1166 * 4. Even with just a single CREATE INDEX, there's a risk factor because
1167 * someone else might be trying to open the rel while we commit, and this
1168 * creates a race condition as to whether he will see both or neither of
1169 * the pg_class row versions as valid. Again, a non-transactional update
1170 * avoids the risk. It is indeterminate which state of the row the other
1171 * process will see, but it doesn't matter (if he's only taking
1172 * AccessShareLock, then it's not critical that he see relhasindex true).
1174 * It is safe to use a non-transactional update even though our
1175 * transaction could still fail before committing. Setting relhasindex
1176 * true is safe even if there are no indexes (VACUUM will eventually fix
1177 * it), and of course the relpages and reltuples counts are correct (or at
1178 * least more so than the old values) regardless.
1181 pg_class
= heap_open(RelationRelationId
, RowExclusiveLock
);
1184 * Make a copy of the tuple to update. Normally we use the syscache, but
1185 * we can't rely on that during bootstrap or while reindexing pg_class
1188 if (IsBootstrapProcessingMode() ||
1189 ReindexIsProcessingHeap(RelationRelationId
))
1191 /* don't assume syscache will work */
1192 HeapScanDesc pg_class_scan
;
1195 ScanKeyInit(&key
[0],
1196 ObjectIdAttributeNumber
,
1197 BTEqualStrategyNumber
, F_OIDEQ
,
1198 ObjectIdGetDatum(relid
));
1200 pg_class_scan
= heap_beginscan(pg_class
, SnapshotNow
, 1, key
);
1201 tuple
= heap_getnext(pg_class_scan
, ForwardScanDirection
);
1202 tuple
= heap_copytuple(tuple
);
1203 heap_endscan(pg_class_scan
);
1207 /* normal case, use syscache */
1208 tuple
= SearchSysCacheCopy(RELOID
,
1209 ObjectIdGetDatum(relid
),
1213 if (!HeapTupleIsValid(tuple
))
1214 elog(ERROR
, "could not find tuple for relation %u", relid
);
1215 rd_rel
= (Form_pg_class
) GETSTRUCT(tuple
);
1217 /* Apply required updates, if any, to copied tuple */
1220 if (rd_rel
->relhasindex
!= hasindex
)
1222 rd_rel
->relhasindex
= hasindex
;
1227 if (!rd_rel
->relhaspkey
)
1229 rd_rel
->relhaspkey
= true;
1233 if (OidIsValid(reltoastidxid
))
1235 Assert(rd_rel
->relkind
== RELKIND_TOASTVALUE
);
1236 if (rd_rel
->reltoastidxid
!= reltoastidxid
)
1238 rd_rel
->reltoastidxid
= reltoastidxid
;
1242 if (rd_rel
->reltuples
!= (float4
) reltuples
)
1244 rd_rel
->reltuples
= (float4
) reltuples
;
1247 if (rd_rel
->relpages
!= (int32
) relpages
)
1249 rd_rel
->relpages
= (int32
) relpages
;
1254 * If anything changed, write out the tuple
1258 heap_inplace_update(pg_class
, tuple
);
1259 /* the above sends a cache inval message */
1263 /* no need to change tuple, but force relcache inval anyway */
1264 CacheInvalidateRelcacheByTuple(tuple
);
1267 heap_freetuple(tuple
);
1269 heap_close(pg_class
, RowExclusiveLock
);
1273 * setNewRelfilenode - assign a new relfilenode value to the relation
1275 * Caller must already hold exclusive lock on the relation.
1277 * The relation is marked with relfrozenxid=freezeXid (InvalidTransactionId
1278 * must be passed for indexes)
1281 setNewRelfilenode(Relation relation
, TransactionId freezeXid
)
1284 RelFileNode newrnode
;
1288 Form_pg_class rd_rel
;
1291 /* Can't change relfilenode for nailed tables (indexes ok though) */
1292 Assert(!relation
->rd_isnailed
||
1293 relation
->rd_rel
->relkind
== RELKIND_INDEX
);
1294 /* Can't change for shared tables or indexes */
1295 Assert(!relation
->rd_rel
->relisshared
);
1296 /* Indexes must have Invalid frozenxid; other relations must not */
1297 Assert((relation
->rd_rel
->relkind
== RELKIND_INDEX
&&
1298 freezeXid
== InvalidTransactionId
) ||
1299 TransactionIdIsNormal(freezeXid
));
1301 /* Allocate a new relfilenode */
1302 newrelfilenode
= GetNewRelFileNode(relation
->rd_rel
->reltablespace
,
1303 relation
->rd_rel
->relisshared
,
1307 * Find the pg_class tuple for the given relation. This is not used
1308 * during bootstrap, so okay to use heap_update always.
1310 pg_class
= heap_open(RelationRelationId
, RowExclusiveLock
);
1312 tuple
= SearchSysCacheCopy(RELOID
,
1313 ObjectIdGetDatum(RelationGetRelid(relation
)),
1315 if (!HeapTupleIsValid(tuple
))
1316 elog(ERROR
, "could not find tuple for relation %u",
1317 RelationGetRelid(relation
));
1318 rd_rel
= (Form_pg_class
) GETSTRUCT(tuple
);
1320 RelationOpenSmgr(relation
);
1323 * ... and create storage for corresponding forks in the new relfilenode.
1325 * NOTE: any conflict in relfilenode value will be caught here
1327 newrnode
= relation
->rd_node
;
1328 newrnode
.relNode
= newrelfilenode
;
1329 srel
= smgropen(newrnode
);
1331 /* Create the main fork, like heap_create() does */
1332 smgrcreate(srel
, MAIN_FORKNUM
, relation
->rd_istemp
, false);
1335 * For a heap, create FSM fork as well. Indexams are responsible for
1336 * creating any extra forks themselves.
1338 if (relation
->rd_rel
->relkind
== RELKIND_RELATION
||
1339 relation
->rd_rel
->relkind
== RELKIND_TOASTVALUE
)
1340 smgrcreate(srel
, FSM_FORKNUM
, relation
->rd_istemp
, false);
1342 /* schedule unlinking old files */
1343 for (i
= 0; i
<= MAX_FORKNUM
; i
++)
1345 if (smgrexists(relation
->rd_smgr
, i
))
1346 smgrscheduleunlink(relation
->rd_smgr
, i
, relation
->rd_istemp
);
1350 RelationCloseSmgr(relation
);
1352 /* update the pg_class row */
1353 rd_rel
->relfilenode
= newrelfilenode
;
1354 rd_rel
->relpages
= 0; /* it's empty until further notice */
1355 rd_rel
->reltuples
= 0;
1356 rd_rel
->relfrozenxid
= freezeXid
;
1357 simple_heap_update(pg_class
, &tuple
->t_self
, tuple
);
1358 CatalogUpdateIndexes(pg_class
, tuple
);
1360 heap_freetuple(tuple
);
1362 heap_close(pg_class
, RowExclusiveLock
);
1364 /* Make sure the relfilenode change is visible */
1365 CommandCounterIncrement();
1367 /* Mark the rel as having a new relfilenode in current transaction */
1368 RelationCacheMarkNewRelfilenode(relation
);
1373 * index_build - invoke access-method-specific index build procedure
1375 * On entry, the index's catalog entries are valid, and its physical disk
1376 * file has been created but is empty. We call the AM-specific build
1377 * procedure to fill in the index contents. We then update the pg_class
1378 * entries of the index and heap relation as needed, using statistics
1379 * returned by ambuild as well as data passed by the caller.
1381 * Note: when reindexing an existing index, isprimary can be false;
1382 * the index is already properly marked and need not be re-marked.
1384 * Note: before Postgres 8.2, the passed-in heap and index Relations
1385 * were automatically closed by this routine. This is no longer the case.
1386 * The caller opened 'em, and the caller should close 'em.
1389 index_build(Relation heapRelation
,
1390 Relation indexRelation
,
1391 IndexInfo
*indexInfo
,
1394 RegProcedure procedure
;
1395 IndexBuildResult
*stats
;
1397 bool save_secdefcxt
;
1402 Assert(RelationIsValid(indexRelation
));
1403 Assert(PointerIsValid(indexRelation
->rd_am
));
1405 procedure
= indexRelation
->rd_am
->ambuild
;
1406 Assert(RegProcedureIsValid(procedure
));
1409 * Switch to the table owner's userid, so that any index functions are
1412 GetUserIdAndContext(&save_userid
, &save_secdefcxt
);
1413 SetUserIdAndContext(heapRelation
->rd_rel
->relowner
, true);
1416 * Call the access method's build procedure
1418 stats
= (IndexBuildResult
*)
1419 DatumGetPointer(OidFunctionCall3(procedure
,
1420 PointerGetDatum(heapRelation
),
1421 PointerGetDatum(indexRelation
),
1422 PointerGetDatum(indexInfo
)));
1423 Assert(PointerIsValid(stats
));
1425 /* Restore userid */
1426 SetUserIdAndContext(save_userid
, save_secdefcxt
);
1429 * If we found any potentially broken HOT chains, mark the index as not
1430 * being usable until the current transaction is below the event horizon.
1431 * See src/backend/access/heap/README.HOT for discussion.
1433 if (indexInfo
->ii_BrokenHotChain
)
1435 Oid indexId
= RelationGetRelid(indexRelation
);
1437 HeapTuple indexTuple
;
1438 Form_pg_index indexForm
;
1440 pg_index
= heap_open(IndexRelationId
, RowExclusiveLock
);
1442 indexTuple
= SearchSysCacheCopy(INDEXRELID
,
1443 ObjectIdGetDatum(indexId
),
1445 if (!HeapTupleIsValid(indexTuple
))
1446 elog(ERROR
, "cache lookup failed for index %u", indexId
);
1447 indexForm
= (Form_pg_index
) GETSTRUCT(indexTuple
);
1449 indexForm
->indcheckxmin
= true;
1450 simple_heap_update(pg_index
, &indexTuple
->t_self
, indexTuple
);
1451 CatalogUpdateIndexes(pg_index
, indexTuple
);
1453 heap_freetuple(indexTuple
);
1454 heap_close(pg_index
, RowExclusiveLock
);
1458 * Update heap and index pg_class rows
1460 index_update_stats(heapRelation
,
1463 (heapRelation
->rd_rel
->relkind
== RELKIND_TOASTVALUE
) ?
1464 RelationGetRelid(indexRelation
) : InvalidOid
,
1465 stats
->heap_tuples
);
1467 index_update_stats(indexRelation
,
1471 stats
->index_tuples
);
1473 /* Make the updated versions visible */
1474 CommandCounterIncrement();
1479 * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
1481 * This is called back from an access-method-specific index build procedure
1482 * after the AM has done whatever setup it needs. The parent heap relation
1483 * is scanned to find tuples that should be entered into the index. Each
1484 * such tuple is passed to the AM's callback routine, which does the right
1485 * things to add it to the new index. After we return, the AM's index
1486 * build procedure does whatever cleanup is needed; in particular, it should
1487 * close the heap and index relations.
1489 * The total count of heap tuples is returned. This is for updating pg_class
1490 * statistics. (It's annoying not to be able to do that here, but we can't
1491 * do it until after the relation is closed.) Note that the index AM itself
1492 * must keep track of the number of index tuples; we don't do so here because
1493 * the AM might reject some of the tuples for its own reasons, such as being
1494 * unable to store NULLs.
1496 * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
1497 * any potentially broken HOT chains. Currently, we set this if there are
1498 * any RECENTLY_DEAD entries in a HOT chain, without trying very hard to
1499 * detect whether they're really incompatible with the chain tip.
1502 IndexBuildHeapScan(Relation heapRelation
,
1503 Relation indexRelation
,
1504 IndexInfo
*indexInfo
,
1505 IndexBuildCallback callback
,
1506 void *callback_state
)
1509 HeapTuple heapTuple
;
1510 Datum values
[INDEX_MAX_KEYS
];
1511 bool isnull
[INDEX_MAX_KEYS
];
1514 TupleTableSlot
*slot
;
1516 ExprContext
*econtext
;
1518 TransactionId OldestXmin
;
1519 BlockNumber root_blkno
= InvalidBlockNumber
;
1520 OffsetNumber root_offsets
[MaxHeapTuplesPerPage
];
1525 Assert(OidIsValid(indexRelation
->rd_rel
->relam
));
1528 * Need an EState for evaluation of index expressions and partial-index
1529 * predicates. Also a slot to hold the current tuple.
1531 estate
= CreateExecutorState();
1532 econtext
= GetPerTupleExprContext(estate
);
1533 slot
= MakeSingleTupleTableSlot(RelationGetDescr(heapRelation
));
1535 /* Arrange for econtext's scan tuple to be the tuple under test */
1536 econtext
->ecxt_scantuple
= slot
;
1538 /* Set up execution state for predicate, if any. */
1539 predicate
= (List
*)
1540 ExecPrepareExpr((Expr
*) indexInfo
->ii_Predicate
,
1544 * Prepare for scan of the base relation. In a normal index build, we use
1545 * SnapshotAny because we must retrieve all tuples and do our own time
1546 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1547 * concurrent build, we take a regular MVCC snapshot and index whatever's
1548 * live according to that. During bootstrap we just use SnapshotNow.
1550 if (IsBootstrapProcessingMode())
1552 snapshot
= SnapshotNow
;
1553 OldestXmin
= InvalidTransactionId
; /* not used */
1555 else if (indexInfo
->ii_Concurrent
)
1557 snapshot
= RegisterSnapshot(GetTransactionSnapshot());
1558 OldestXmin
= InvalidTransactionId
; /* not used */
1562 snapshot
= SnapshotAny
;
1563 /* okay to ignore lazy VACUUMs here */
1564 OldestXmin
= GetOldestXmin(heapRelation
->rd_rel
->relisshared
, true);
1567 scan
= heap_beginscan(heapRelation
, snapshot
, 0, NULL
);
1572 * Scan all tuples in the base relation.
1574 while ((heapTuple
= heap_getnext(scan
, ForwardScanDirection
)) != NULL
)
1578 CHECK_FOR_INTERRUPTS();
1581 * When dealing with a HOT-chain of updated tuples, we want to index
1582 * the values of the live tuple (if any), but index it under the TID
1583 * of the chain's root tuple. This approach is necessary to preserve
1584 * the HOT-chain structure in the heap. So we need to be able to find
1585 * the root item offset for every tuple that's in a HOT-chain. When
1586 * first reaching a new page of the relation, call
1587 * heap_get_root_tuples() to build a map of root item offsets on the
1590 * It might look unsafe to use this information across buffer
1591 * lock/unlock. However, we hold ShareLock on the table so no
1592 * ordinary insert/update/delete should occur; and we hold pin on the
1593 * buffer continuously while visiting the page, so no pruning
1594 * operation can occur either.
1596 * Note the implied assumption that there is no more than one live
1597 * tuple per HOT-chain ...
1599 if (scan
->rs_cblock
!= root_blkno
)
1601 Page page
= BufferGetPage(scan
->rs_cbuf
);
1603 LockBuffer(scan
->rs_cbuf
, BUFFER_LOCK_SHARE
);
1604 heap_get_root_tuples(page
, root_offsets
);
1605 LockBuffer(scan
->rs_cbuf
, BUFFER_LOCK_UNLOCK
);
1607 root_blkno
= scan
->rs_cblock
;
1610 if (snapshot
== SnapshotAny
)
1612 /* do our own time qual check */
1618 * We could possibly get away with not locking the buffer here,
1619 * since caller should hold ShareLock on the relation, but let's
1620 * be conservative about it. (This remark is still correct even
1621 * with HOT-pruning: our pin on the buffer prevents pruning.)
1623 LockBuffer(scan
->rs_cbuf
, BUFFER_LOCK_SHARE
);
1625 switch (HeapTupleSatisfiesVacuum(heapTuple
->t_data
, OldestXmin
,
1628 case HEAPTUPLE_DEAD
:
1629 /* Definitely dead, we can ignore it */
1631 tupleIsAlive
= false;
1633 case HEAPTUPLE_LIVE
:
1634 /* Normal case, index and unique-check it */
1636 tupleIsAlive
= true;
1638 case HEAPTUPLE_RECENTLY_DEAD
:
1641 * If tuple is recently deleted then we must index it
1642 * anyway to preserve MVCC semantics. (Pre-existing
1643 * transactions could try to use the index after we finish
1644 * building it, and may need to see such tuples.)
1646 * However, if it was HOT-updated then we must only index
1647 * the live tuple at the end of the HOT-chain. Since this
1648 * breaks semantics for pre-existing snapshots, mark the
1649 * index as unusable for them.
1651 * If we've already decided that the index will be unsafe
1652 * for old snapshots, we may as well stop indexing
1653 * recently-dead tuples, since there's no longer any
1656 if (HeapTupleIsHotUpdated(heapTuple
))
1659 /* mark the index as unsafe for old snapshots */
1660 indexInfo
->ii_BrokenHotChain
= true;
1662 else if (indexInfo
->ii_BrokenHotChain
)
1666 /* In any case, exclude the tuple from unique-checking */
1667 tupleIsAlive
= false;
1669 case HEAPTUPLE_INSERT_IN_PROGRESS
:
1672 * Since caller should hold ShareLock or better, we should
1673 * not see any tuples inserted by open transactions ---
1674 * unless it's our own transaction. (Consider INSERT
1675 * followed by CREATE INDEX within a transaction.) An
1676 * exception occurs when reindexing a system catalog,
1677 * because we often release lock on system catalogs before
1678 * committing. In that case we wait for the inserting
1679 * transaction to finish and check again. (We could do
1680 * that on user tables too, but since the case is not
1681 * expected it seems better to throw an error.)
1683 if (!TransactionIdIsCurrentTransactionId(
1684 HeapTupleHeaderGetXmin(heapTuple
->t_data
)))
1686 if (!IsSystemRelation(heapRelation
))
1687 elog(ERROR
, "concurrent insert in progress");
1691 * Must drop the lock on the buffer before we wait
1693 TransactionId xwait
= HeapTupleHeaderGetXmin(heapTuple
->t_data
);
1695 LockBuffer(scan
->rs_cbuf
, BUFFER_LOCK_UNLOCK
);
1696 XactLockTableWait(xwait
);
1702 * We must index such tuples, since if the index build
1703 * commits then they're good.
1706 tupleIsAlive
= true;
1708 case HEAPTUPLE_DELETE_IN_PROGRESS
:
1711 * Since caller should hold ShareLock or better, we should
1712 * not see any tuples deleted by open transactions ---
1713 * unless it's our own transaction. (Consider DELETE
1714 * followed by CREATE INDEX within a transaction.) An
1715 * exception occurs when reindexing a system catalog,
1716 * because we often release lock on system catalogs before
1717 * committing. In that case we wait for the deleting
1718 * transaction to finish and check again. (We could do
1719 * that on user tables too, but since the case is not
1720 * expected it seems better to throw an error.)
1722 Assert(!(heapTuple
->t_data
->t_infomask
& HEAP_XMAX_IS_MULTI
));
1723 if (!TransactionIdIsCurrentTransactionId(
1724 HeapTupleHeaderGetXmax(heapTuple
->t_data
)))
1726 if (!IsSystemRelation(heapRelation
))
1727 elog(ERROR
, "concurrent delete in progress");
1731 * Must drop the lock on the buffer before we wait
1733 TransactionId xwait
= HeapTupleHeaderGetXmax(heapTuple
->t_data
);
1735 LockBuffer(scan
->rs_cbuf
, BUFFER_LOCK_UNLOCK
);
1736 XactLockTableWait(xwait
);
1742 * Otherwise, we have to treat these tuples just like
1743 * RECENTLY_DELETED ones.
1745 if (HeapTupleIsHotUpdated(heapTuple
))
1748 /* mark the index as unsafe for old snapshots */
1749 indexInfo
->ii_BrokenHotChain
= true;
1751 else if (indexInfo
->ii_BrokenHotChain
)
1755 /* In any case, exclude the tuple from unique-checking */
1756 tupleIsAlive
= false;
1759 elog(ERROR
, "unexpected HeapTupleSatisfiesVacuum result");
1760 indexIt
= tupleIsAlive
= false; /* keep compiler quiet */
1764 LockBuffer(scan
->rs_cbuf
, BUFFER_LOCK_UNLOCK
);
1771 /* heap_getnext did the time qual check */
1772 tupleIsAlive
= true;
1777 MemoryContextReset(econtext
->ecxt_per_tuple_memory
);
1779 /* Set up for predicate or expression evaluation */
1780 ExecStoreTuple(heapTuple
, slot
, InvalidBuffer
, false);
1783 * In a partial index, discard tuples that don't satisfy the
1786 if (predicate
!= NIL
)
1788 if (!ExecQual(predicate
, econtext
, false))
1793 * For the current heap tuple, extract all the attributes we use in
1794 * this index, and note which are null. This also performs evaluation
1795 * of any expressions needed.
1797 FormIndexDatum(indexInfo
,
1804 * You'd think we should go ahead and build the index tuple here, but
1805 * some index AMs want to do further processing on the data first. So
1806 * pass the values[] and isnull[] arrays, instead.
1809 if (HeapTupleIsHeapOnly(heapTuple
))
1812 * For a heap-only tuple, pretend its TID is that of the root. See
1813 * src/backend/access/heap/README.HOT for discussion.
1815 HeapTupleData rootTuple
;
1816 OffsetNumber offnum
;
1818 rootTuple
= *heapTuple
;
1819 offnum
= ItemPointerGetOffsetNumber(&heapTuple
->t_self
);
1821 Assert(OffsetNumberIsValid(root_offsets
[offnum
- 1]));
1823 ItemPointerSetOffsetNumber(&rootTuple
.t_self
,
1824 root_offsets
[offnum
- 1]);
1826 /* Call the AM's callback routine to process the tuple */
1827 callback(indexRelation
, &rootTuple
, values
, isnull
, tupleIsAlive
,
1832 /* Call the AM's callback routine to process the tuple */
1833 callback(indexRelation
, heapTuple
, values
, isnull
, tupleIsAlive
,
1840 /* we can now forget our snapshot, if set */
1841 if (indexInfo
->ii_Concurrent
)
1842 UnregisterSnapshot(snapshot
);
1844 ExecDropSingleTupleTableSlot(slot
);
1846 FreeExecutorState(estate
);
1848 /* These may have been pointing to the now-gone estate */
1849 indexInfo
->ii_ExpressionsState
= NIL
;
1850 indexInfo
->ii_PredicateState
= NIL
;
1857 * validate_index - support code for concurrent index builds
1859 * We do a concurrent index build by first inserting the catalog entry for the
1860 * index via index_create(), marking it not indisready and not indisvalid.
1861 * Then we commit our transaction and start a new one, then we wait for all
1862 * transactions that could have been modifying the table to terminate. Now
1863 * we know that any subsequently-started transactions will see the index and
1864 * honor its constraints on HOT updates; so while existing HOT-chains might
1865 * be broken with respect to the index, no currently live tuple will have an
1866 * incompatible HOT update done to it. We now build the index normally via
1867 * index_build(), while holding a weak lock that allows concurrent
1868 * insert/update/delete. Also, we index only tuples that are valid
1869 * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
1870 * build takes care to include recently-dead tuples. This is OK because
1871 * we won't mark the index valid until all transactions that might be able
1872 * to see those tuples are gone. The reason for doing that is to avoid
1873 * bogus unique-index failures due to concurrent UPDATEs (we might see
1874 * different versions of the same row as being valid when we pass over them,
1875 * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
1876 * does not contain any tuples added to the table while we built the index.
1878 * Next, we mark the index "indisready" (but still not "indisvalid") and
1879 * commit the second transaction and start a third. Again we wait for all
1880 * transactions that could have been modifying the table to terminate. Now
1881 * we know that any subsequently-started transactions will see the index and
1882 * insert their new tuples into it. We then take a new reference snapshot
1883 * which is passed to validate_index(). Any tuples that are valid according
1884 * to this snap, but are not in the index, must be added to the index.
1885 * (Any tuples committed live after the snap will be inserted into the
1886 * index by their originating transaction. Any tuples committed dead before
1887 * the snap need not be indexed, because we will wait out all transactions
1888 * that might care about them before we mark the index valid.)
1890 * validate_index() works by first gathering all the TIDs currently in the
1891 * index, using a bulkdelete callback that just stores the TIDs and doesn't
1892 * ever say "delete it". (This should be faster than a plain indexscan;
1893 * also, not all index AMs support full-index indexscan.) Then we sort the
1894 * TIDs, and finally scan the table doing a "merge join" against the TID list
1895 * to see which tuples are missing from the index. Thus we will ensure that
1896 * all tuples valid according to the reference snapshot are in the index.
1898 * Building a unique index this way is tricky: we might try to insert a
1899 * tuple that is already dead or is in process of being deleted, and we
1900 * mustn't have a uniqueness failure against an updated version of the same
1901 * row. We could try to check the tuple to see if it's already dead and tell
1902 * index_insert() not to do the uniqueness check, but that still leaves us
1903 * with a race condition against an in-progress update. To handle that,
1904 * we expect the index AM to recheck liveness of the to-be-inserted tuple
1905 * before it declares a uniqueness error.
1907 * After completing validate_index(), we wait until all transactions that
1908 * were alive at the time of the reference snapshot are gone; this is
1909 * necessary to be sure there are none left with a serializable snapshot
1910 * older than the reference (and hence possibly able to see tuples we did
1911 * not index). Then we mark the index "indisvalid" and commit. Subsequent
1912 * transactions will be able to use it for queries.
1914 * Doing two full table scans is a brute-force strategy. We could try to be
1915 * cleverer, eg storing new tuples in a special area of the table (perhaps
1916 * making the table append-only by setting use_fsm). However that would
1917 * add yet more locking issues.
1920 validate_index(Oid heapId
, Oid indexId
, Snapshot snapshot
)
1922 Relation heapRelation
,
1924 IndexInfo
*indexInfo
;
1925 IndexVacuumInfo ivinfo
;
1928 bool save_secdefcxt
;
1930 /* Open and lock the parent heap relation */
1931 heapRelation
= heap_open(heapId
, ShareUpdateExclusiveLock
);
1932 /* And the target index relation */
1933 indexRelation
= index_open(indexId
, RowExclusiveLock
);
1936 * Fetch info needed for index_insert. (You might think this should be
1937 * passed in from DefineIndex, but its copy is long gone due to having
1938 * been built in a previous transaction.)
1940 indexInfo
= BuildIndexInfo(indexRelation
);
1942 /* mark build is concurrent just for consistency */
1943 indexInfo
->ii_Concurrent
= true;
1946 * Switch to the table owner's userid, so that any index functions are
1949 GetUserIdAndContext(&save_userid
, &save_secdefcxt
);
1950 SetUserIdAndContext(heapRelation
->rd_rel
->relowner
, true);
1953 * Scan the index and gather up all the TIDs into a tuplesort object.
1955 ivinfo
.index
= indexRelation
;
1956 ivinfo
.vacuum_full
= false;
1957 ivinfo
.message_level
= DEBUG2
;
1958 ivinfo
.num_heap_tuples
= -1;
1959 ivinfo
.strategy
= NULL
;
1961 state
.tuplesort
= tuplesort_begin_datum(TIDOID
,
1962 TIDLessOperator
, false,
1963 maintenance_work_mem
,
1965 state
.htups
= state
.itups
= state
.tups_inserted
= 0;
1967 (void) index_bulk_delete(&ivinfo
, NULL
,
1968 validate_index_callback
, (void *) &state
);
1970 /* Execute the sort */
1971 tuplesort_performsort(state
.tuplesort
);
1974 * Now scan the heap and "merge" it with the index
1976 validate_index_heapscan(heapRelation
,
1982 /* Done with tuplesort object */
1983 tuplesort_end(state
.tuplesort
);
1986 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
1987 state
.htups
, state
.itups
, state
.tups_inserted
);
1989 /* Restore userid */
1990 SetUserIdAndContext(save_userid
, save_secdefcxt
);
1992 /* Close rels, but keep locks */
1993 index_close(indexRelation
, NoLock
);
1994 heap_close(heapRelation
, NoLock
);
1998 * validate_index_callback - bulkdelete callback to collect the index TIDs
2001 validate_index_callback(ItemPointer itemptr
, void *opaque
)
2003 v_i_state
*state
= (v_i_state
*) opaque
;
2005 tuplesort_putdatum(state
->tuplesort
, PointerGetDatum(itemptr
), false);
2007 return false; /* never actually delete anything */
2011 * validate_index_heapscan - second table scan for concurrent index build
2013 * This has much code in common with IndexBuildHeapScan, but it's enough
2014 * different that it seems cleaner to have two routines not one.
2017 validate_index_heapscan(Relation heapRelation
,
2018 Relation indexRelation
,
2019 IndexInfo
*indexInfo
,
2024 HeapTuple heapTuple
;
2025 Datum values
[INDEX_MAX_KEYS
];
2026 bool isnull
[INDEX_MAX_KEYS
];
2028 TupleTableSlot
*slot
;
2030 ExprContext
*econtext
;
2031 BlockNumber root_blkno
= InvalidBlockNumber
;
2032 OffsetNumber root_offsets
[MaxHeapTuplesPerPage
];
2033 bool in_index
[MaxHeapTuplesPerPage
];
2035 /* state variables for the merge */
2036 ItemPointer indexcursor
= NULL
;
2037 bool tuplesort_empty
= false;
2042 Assert(OidIsValid(indexRelation
->rd_rel
->relam
));
2045 * Need an EState for evaluation of index expressions and partial-index
2046 * predicates. Also a slot to hold the current tuple.
2048 estate
= CreateExecutorState();
2049 econtext
= GetPerTupleExprContext(estate
);
2050 slot
= MakeSingleTupleTableSlot(RelationGetDescr(heapRelation
));
2052 /* Arrange for econtext's scan tuple to be the tuple under test */
2053 econtext
->ecxt_scantuple
= slot
;
2055 /* Set up execution state for predicate, if any. */
2056 predicate
= (List
*)
2057 ExecPrepareExpr((Expr
*) indexInfo
->ii_Predicate
,
2061 * Prepare for scan of the base relation. We need just those tuples
2062 * satisfying the passed-in reference snapshot. We must disable syncscan
2063 * here, because it's critical that we read from block zero forward to
2064 * match the sorted TIDs.
2066 scan
= heap_beginscan_strat(heapRelation
, /* relation */
2067 snapshot
, /* snapshot */
2068 0, /* number of keys */
2069 NULL
, /* scan key */
2070 true, /* buffer access strategy OK */
2071 false); /* syncscan not OK */
2074 * Scan all tuples matching the snapshot.
2076 while ((heapTuple
= heap_getnext(scan
, ForwardScanDirection
)) != NULL
)
2078 ItemPointer heapcursor
= &heapTuple
->t_self
;
2079 ItemPointerData rootTuple
;
2080 OffsetNumber root_offnum
;
2082 CHECK_FOR_INTERRUPTS();
2087 * As commented in IndexBuildHeapScan, we should index heap-only
2088 * tuples under the TIDs of their root tuples; so when we advance onto
2089 * a new heap page, build a map of root item offsets on the page.
2091 * This complicates merging against the tuplesort output: we will
2092 * visit the live tuples in order by their offsets, but the root
2093 * offsets that we need to compare against the index contents might be
2094 * ordered differently. So we might have to "look back" within the
2095 * tuplesort output, but only within the current page. We handle that
2096 * by keeping a bool array in_index[] showing all the
2097 * already-passed-over tuplesort output TIDs of the current page. We
2098 * clear that array here, when advancing onto a new heap page.
2100 if (scan
->rs_cblock
!= root_blkno
)
2102 Page page
= BufferGetPage(scan
->rs_cbuf
);
2104 LockBuffer(scan
->rs_cbuf
, BUFFER_LOCK_SHARE
);
2105 heap_get_root_tuples(page
, root_offsets
);
2106 LockBuffer(scan
->rs_cbuf
, BUFFER_LOCK_UNLOCK
);
2108 memset(in_index
, 0, sizeof(in_index
));
2110 root_blkno
= scan
->rs_cblock
;
2113 /* Convert actual tuple TID to root TID */
2114 rootTuple
= *heapcursor
;
2115 root_offnum
= ItemPointerGetOffsetNumber(heapcursor
);
2117 if (HeapTupleIsHeapOnly(heapTuple
))
2119 root_offnum
= root_offsets
[root_offnum
- 1];
2120 Assert(OffsetNumberIsValid(root_offnum
));
2121 ItemPointerSetOffsetNumber(&rootTuple
, root_offnum
);
2125 * "merge" by skipping through the index tuples until we find or pass
2126 * the current root tuple.
2128 while (!tuplesort_empty
&&
2130 ItemPointerCompare(indexcursor
, &rootTuple
) < 0))
2138 * Remember index items seen earlier on the current heap page
2140 if (ItemPointerGetBlockNumber(indexcursor
) == root_blkno
)
2141 in_index
[ItemPointerGetOffsetNumber(indexcursor
) - 1] = true;
2145 tuplesort_empty
= !tuplesort_getdatum(state
->tuplesort
, true,
2146 &ts_val
, &ts_isnull
);
2147 Assert(tuplesort_empty
|| !ts_isnull
);
2148 indexcursor
= (ItemPointer
) DatumGetPointer(ts_val
);
2152 * If the tuplesort has overshot *and* we didn't see a match earlier,
2153 * then this tuple is missing from the index, so insert it.
2155 if ((tuplesort_empty
||
2156 ItemPointerCompare(indexcursor
, &rootTuple
) > 0) &&
2157 !in_index
[root_offnum
- 1])
2159 MemoryContextReset(econtext
->ecxt_per_tuple_memory
);
2161 /* Set up for predicate or expression evaluation */
2162 ExecStoreTuple(heapTuple
, slot
, InvalidBuffer
, false);
2165 * In a partial index, discard tuples that don't satisfy the
2168 if (predicate
!= NIL
)
2170 if (!ExecQual(predicate
, econtext
, false))
2175 * For the current heap tuple, extract all the attributes we use
2176 * in this index, and note which are null. This also performs
2177 * evaluation of any expressions needed.
2179 FormIndexDatum(indexInfo
,
2186 * You'd think we should go ahead and build the index tuple here,
2187 * but some index AMs want to do further processing on the data
2188 * first. So pass the values[] and isnull[] arrays, instead.
2192 * If the tuple is already committed dead, you might think we
2193 * could suppress uniqueness checking, but this is no longer true
2194 * in the presence of HOT, because the insert is actually a proxy
2195 * for a uniqueness check on the whole HOT-chain. That is, the
2196 * tuple we have here could be dead because it was already
2197 * HOT-updated, and if so the updating transaction will not have
2198 * thought it should insert index entries. The index AM will
2199 * check the whole HOT-chain and correctly detect a conflict if
2203 index_insert(indexRelation
,
2208 indexInfo
->ii_Unique
);
2210 state
->tups_inserted
+= 1;
2216 ExecDropSingleTupleTableSlot(slot
);
2218 FreeExecutorState(estate
);
2220 /* These may have been pointing to the now-gone estate */
2221 indexInfo
->ii_ExpressionsState
= NIL
;
2222 indexInfo
->ii_PredicateState
= NIL
;
2227 * IndexGetRelation: given an index's relation OID, get the OID of the
2228 * relation it is an index on. Uses the system cache.
2231 IndexGetRelation(Oid indexId
)
2234 Form_pg_index index
;
2237 tuple
= SearchSysCache(INDEXRELID
,
2238 ObjectIdGetDatum(indexId
),
2240 if (!HeapTupleIsValid(tuple
))
2241 elog(ERROR
, "cache lookup failed for index %u", indexId
);
2242 index
= (Form_pg_index
) GETSTRUCT(tuple
);
2243 Assert(index
->indexrelid
== indexId
);
2245 result
= index
->indrelid
;
2246 ReleaseSysCache(tuple
);
2251 * reindex_index - This routine is used to recreate a single index
2254 reindex_index(Oid indexId
)
2261 HeapTuple indexTuple
;
2262 Form_pg_index indexForm
;
2265 * Open and lock the parent heap relation. ShareLock is sufficient since
2266 * we only need to be sure no schema or data changes are going on.
2268 heapId
= IndexGetRelation(indexId
);
2269 heapRelation
= heap_open(heapId
, ShareLock
);
2272 * Open the target index relation and get an exclusive lock on it, to
2273 * ensure that no one else is touching this particular index.
2275 iRel
= index_open(indexId
, AccessExclusiveLock
);
2278 * Don't allow reindex on temp tables of other backends ... their local
2279 * buffer manager is not going to cope.
2281 if (isOtherTempNamespace(RelationGetNamespace(iRel
)))
2283 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
2284 errmsg("cannot reindex temporary tables of other sessions")));
2287 * Also check for active uses of the index in the current transaction;
2288 * we don't want to reindex underneath an open indexscan.
2290 CheckTableNotInUse(iRel
, "REINDEX INDEX");
2293 * If it's a shared index, we must do inplace processing (because we have
2294 * no way to update relfilenode in other databases). Otherwise we can do
2295 * it the normal transaction-safe way.
2297 * Since inplace processing isn't crash-safe, we only allow it in a
2298 * standalone backend. (In the REINDEX TABLE and REINDEX DATABASE cases,
2299 * the caller should have detected this.)
2301 inplace
= iRel
->rd_rel
->relisshared
;
2303 if (inplace
&& IsUnderPostmaster
)
2305 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE
),
2306 errmsg("shared index \"%s\" can only be reindexed in stand-alone mode",
2307 RelationGetRelationName(iRel
))));
2311 IndexInfo
*indexInfo
;
2313 /* Suppress use of the target index while rebuilding it */
2314 SetReindexProcessing(heapId
, indexId
);
2316 /* Fetch info needed for index_build */
2317 indexInfo
= BuildIndexInfo(iRel
);
2322 * Truncate the actual file (and discard buffers). The indexam
2323 * is responsible for truncating the FSM, if applicable
2325 RelationTruncate(iRel
, 0);
2330 * We'll build a new physical relation for the index.
2332 setNewRelfilenode(iRel
, InvalidTransactionId
);
2335 /* Initialize the index and rebuild */
2336 /* Note: we do not need to re-establish pkey setting */
2337 index_build(heapRelation
, iRel
, indexInfo
, false);
2341 /* Make sure flag gets cleared on error exit */
2342 ResetReindexProcessing();
2346 ResetReindexProcessing();
2349 * If the index is marked invalid or not ready (ie, it's from a failed
2350 * CREATE INDEX CONCURRENTLY), we can now mark it valid. This allows
2351 * REINDEX to be used to clean up in such cases.
2353 pg_index
= heap_open(IndexRelationId
, RowExclusiveLock
);
2355 indexTuple
= SearchSysCacheCopy(INDEXRELID
,
2356 ObjectIdGetDatum(indexId
),
2358 if (!HeapTupleIsValid(indexTuple
))
2359 elog(ERROR
, "cache lookup failed for index %u", indexId
);
2360 indexForm
= (Form_pg_index
) GETSTRUCT(indexTuple
);
2362 if (!indexForm
->indisvalid
|| !indexForm
->indisready
)
2364 indexForm
->indisvalid
= true;
2365 indexForm
->indisready
= true;
2366 simple_heap_update(pg_index
, &indexTuple
->t_self
, indexTuple
);
2367 CatalogUpdateIndexes(pg_index
, indexTuple
);
2369 heap_close(pg_index
, RowExclusiveLock
);
2371 /* Close rels, but keep locks */
2372 index_close(iRel
, NoLock
);
2373 heap_close(heapRelation
, NoLock
);
2377 * reindex_relation - This routine is used to recreate all indexes
2378 * of a relation (and optionally its toast relation too, if any).
2380 * Returns true if any indexes were rebuilt. Note that a
2381 * CommandCounterIncrement will occur after each index rebuild.
2384 reindex_relation(Oid relid
, bool toast_too
)
2395 * Open and lock the relation. ShareLock is sufficient since we only need
2396 * to prevent schema and data changes in it.
2398 rel
= heap_open(relid
, ShareLock
);
2400 toast_relid
= rel
->rd_rel
->reltoastrelid
;
2403 * Get the list of index OIDs for this relation. (We trust to the
2404 * relcache to get this with a sequential scan if ignoring system
2407 indexIds
= RelationGetIndexList(rel
);
2410 * reindex_index will attempt to update the pg_class rows for the relation
2411 * and index. If we are processing pg_class itself, we want to make sure
2412 * that the updates do not try to insert index entries into indexes we
2413 * have not processed yet. (When we are trying to recover from corrupted
2414 * indexes, that could easily cause a crash.) We can accomplish this
2415 * because CatalogUpdateIndexes will use the relcache's index list to know
2416 * which indexes to update. We just force the index list to be only the
2417 * stuff we've processed.
2419 * It is okay to not insert entries into the indexes we have not processed
2420 * yet because all of this is transaction-safe. If we fail partway
2421 * through, the updated rows are dead and it doesn't matter whether they
2422 * have index entries. Also, a new pg_class index will be created with an
2423 * entry for its own pg_class row because we do setNewRelfilenode() before
2424 * we do index_build().
2426 * Note that we also clear pg_class's rd_oidindex until the loop is done,
2427 * so that that index can't be accessed either. This means we cannot
2428 * safely generate new relation OIDs while in the loop; shouldn't be a
2431 is_pg_class
= (RelationGetRelid(rel
) == RelationRelationId
);
2433 /* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */
2435 (void) RelationGetIndexAttrBitmap(rel
);
2437 /* Reindex all the indexes. */
2439 foreach(indexId
, indexIds
)
2441 Oid indexOid
= lfirst_oid(indexId
);
2444 RelationSetIndexList(rel
, doneIndexes
, InvalidOid
);
2446 reindex_index(indexOid
);
2448 CommandCounterIncrement();
2451 doneIndexes
= lappend_oid(doneIndexes
, indexOid
);
2455 RelationSetIndexList(rel
, indexIds
, ClassOidIndexId
);
2458 * Close rel, but continue to hold the lock.
2460 heap_close(rel
, NoLock
);
2462 result
= (indexIds
!= NIL
);
2465 * If the relation has a secondary toast rel, reindex that too while we
2466 * still hold the lock on the master table.
2468 if (toast_too
&& OidIsValid(toast_relid
))
2469 result
|= reindex_relation(toast_relid
, false);