Harmonize parameter names in ecpg code.
[pgsql.git] / src / backend / partitioning / partdesc.c
blob737f0edd89b985a0140a501a4059cfcac2ebbc55
1 /*-------------------------------------------------------------------------
3 * partdesc.c
4 * Support routines for manipulating partition descriptors
6 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
9 * IDENTIFICATION
10 * src/backend/partitioning/partdesc.c
12 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include "access/genam.h"
18 #include "access/htup_details.h"
19 #include "access/table.h"
20 #include "catalog/partition.h"
21 #include "catalog/pg_inherits.h"
22 #include "partitioning/partbounds.h"
23 #include "partitioning/partdesc.h"
24 #include "storage/bufmgr.h"
25 #include "storage/sinval.h"
26 #include "utils/builtins.h"
27 #include "utils/fmgroids.h"
28 #include "utils/hsearch.h"
29 #include "utils/inval.h"
30 #include "utils/lsyscache.h"
31 #include "utils/memutils.h"
32 #include "utils/partcache.h"
33 #include "utils/rel.h"
34 #include "utils/syscache.h"
36 typedef struct PartitionDirectoryData
38 MemoryContext pdir_mcxt;
39 HTAB *pdir_hash;
40 bool omit_detached;
41 } PartitionDirectoryData;
43 typedef struct PartitionDirectoryEntry
45 Oid reloid;
46 Relation rel;
47 PartitionDesc pd;
48 } PartitionDirectoryEntry;
50 static PartitionDesc RelationBuildPartitionDesc(Relation rel,
51 bool omit_detached);
55 * RelationGetPartitionDesc -- get partition descriptor, if relation is partitioned
57 * We keep two partdescs in relcache: rd_partdesc includes all partitions
58 * (even those being concurrently marked detached), while rd_partdesc_nodetach
59 * omits (some of) those. We store the pg_inherits.xmin value for the latter,
60 * to determine whether it can be validly reused in each case, since that
61 * depends on the active snapshot.
63 * Note: we arrange for partition descriptors to not get freed until the
64 * relcache entry's refcount goes to zero (see hacks in RelationClose,
65 * RelationClearRelation, and RelationBuildPartitionDesc). Therefore, even
66 * though we hand back a direct pointer into the relcache entry, it's safe
67 * for callers to continue to use that pointer as long as (a) they hold the
68 * relation open, and (b) they hold a relation lock strong enough to ensure
69 * that the data doesn't become stale.
71 PartitionDesc
72 RelationGetPartitionDesc(Relation rel, bool omit_detached)
74 Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
77 * If relcache has a partition descriptor, use that. However, we can only
78 * do so when we are asked to include all partitions including detached;
79 * and also when we know that there are no detached partitions.
81 * If there is no active snapshot, detached partitions aren't omitted
82 * either, so we can use the cached descriptor too in that case.
84 if (likely(rel->rd_partdesc &&
85 (!rel->rd_partdesc->detached_exist || !omit_detached ||
86 !ActiveSnapshotSet())))
87 return rel->rd_partdesc;
90 * If we're asked to omit detached partitions, we may be able to use a
91 * cached descriptor too. We determine that based on the pg_inherits.xmin
92 * that was saved alongside that descriptor: if the xmin that was not in
93 * progress for that active snapshot is also not in progress for the
94 * current active snapshot, then we can use it. Otherwise build one from
95 * scratch.
97 if (omit_detached &&
98 rel->rd_partdesc_nodetached &&
99 ActiveSnapshotSet())
101 Snapshot activesnap;
103 Assert(TransactionIdIsValid(rel->rd_partdesc_nodetached_xmin));
104 activesnap = GetActiveSnapshot();
106 if (!XidInMVCCSnapshot(rel->rd_partdesc_nodetached_xmin, activesnap))
107 return rel->rd_partdesc_nodetached;
110 return RelationBuildPartitionDesc(rel, omit_detached);
114 * RelationBuildPartitionDesc
115 * Form rel's partition descriptor, and store in relcache entry
117 * Partition descriptor is a complex structure; to avoid complicated logic to
118 * free individual elements whenever the relcache entry is flushed, we give it
119 * its own memory context, a child of CacheMemoryContext, which can easily be
120 * deleted on its own. To avoid leaking memory in that context in case of an
121 * error partway through this function, the context is initially created as a
122 * child of CurTransactionContext and only re-parented to CacheMemoryContext
123 * at the end, when no further errors are possible. Also, we don't make this
124 * context the current context except in very brief code sections, out of fear
125 * that some of our callees allocate memory on their own which would be leaked
126 * permanently.
128 * As a special case, partition descriptors that are requested to omit
129 * partitions being detached (and which contain such partitions) are transient
130 * and are not associated with the relcache entry. Such descriptors only last
131 * through the requesting Portal, so we use the corresponding memory context
132 * for them.
134 static PartitionDesc
135 RelationBuildPartitionDesc(Relation rel, bool omit_detached)
137 PartitionDesc partdesc;
138 PartitionBoundInfo boundinfo = NULL;
139 List *inhoids;
140 PartitionBoundSpec **boundspecs = NULL;
141 Oid *oids = NULL;
142 bool *is_leaf = NULL;
143 bool detached_exist;
144 bool is_omit;
145 TransactionId detached_xmin;
146 ListCell *cell;
147 int i,
148 nparts;
149 PartitionKey key = RelationGetPartitionKey(rel);
150 MemoryContext new_pdcxt;
151 MemoryContext oldcxt;
152 int *mapping;
155 * Get partition oids from pg_inherits. This uses a single snapshot to
156 * fetch the list of children, so while more children may be getting added
157 * concurrently, whatever this function returns will be accurate as of
158 * some well-defined point in time.
160 detached_exist = false;
161 detached_xmin = InvalidTransactionId;
162 inhoids = find_inheritance_children_extended(RelationGetRelid(rel),
163 omit_detached, NoLock,
164 &detached_exist,
165 &detached_xmin);
167 nparts = list_length(inhoids);
169 /* Allocate working arrays for OIDs, leaf flags, and boundspecs. */
170 if (nparts > 0)
172 oids = (Oid *) palloc(nparts * sizeof(Oid));
173 is_leaf = (bool *) palloc(nparts * sizeof(bool));
174 boundspecs = palloc(nparts * sizeof(PartitionBoundSpec *));
177 /* Collect bound spec nodes for each partition. */
178 i = 0;
179 foreach(cell, inhoids)
181 Oid inhrelid = lfirst_oid(cell);
182 HeapTuple tuple;
183 PartitionBoundSpec *boundspec = NULL;
185 /* Try fetching the tuple from the catcache, for speed. */
186 tuple = SearchSysCache1(RELOID, inhrelid);
187 if (HeapTupleIsValid(tuple))
189 Datum datum;
190 bool isnull;
192 datum = SysCacheGetAttr(RELOID, tuple,
193 Anum_pg_class_relpartbound,
194 &isnull);
195 if (!isnull)
196 boundspec = stringToNode(TextDatumGetCString(datum));
197 ReleaseSysCache(tuple);
201 * The system cache may be out of date; if so, we may find no pg_class
202 * tuple or an old one where relpartbound is NULL. In that case, try
203 * the table directly. We can't just AcceptInvalidationMessages() and
204 * retry the system cache lookup because it's possible that a
205 * concurrent ATTACH PARTITION operation has removed itself from the
206 * ProcArray but not yet added invalidation messages to the shared
207 * queue; InvalidateSystemCaches() would work, but seems excessive.
209 * Note that this algorithm assumes that PartitionBoundSpec we manage
210 * to fetch is the right one -- so this is only good enough for
211 * concurrent ATTACH PARTITION, not concurrent DETACH PARTITION or
212 * some hypothetical operation that changes the partition bounds.
214 if (boundspec == NULL)
216 Relation pg_class;
217 SysScanDesc scan;
218 ScanKeyData key[1];
219 Datum datum;
220 bool isnull;
222 pg_class = table_open(RelationRelationId, AccessShareLock);
223 ScanKeyInit(&key[0],
224 Anum_pg_class_oid,
225 BTEqualStrategyNumber, F_OIDEQ,
226 ObjectIdGetDatum(inhrelid));
227 scan = systable_beginscan(pg_class, ClassOidIndexId, true,
228 NULL, 1, key);
229 tuple = systable_getnext(scan);
230 datum = heap_getattr(tuple, Anum_pg_class_relpartbound,
231 RelationGetDescr(pg_class), &isnull);
232 if (!isnull)
233 boundspec = stringToNode(TextDatumGetCString(datum));
234 systable_endscan(scan);
235 table_close(pg_class, AccessShareLock);
238 /* Sanity checks. */
239 if (!boundspec)
240 elog(ERROR, "missing relpartbound for relation %u", inhrelid);
241 if (!IsA(boundspec, PartitionBoundSpec))
242 elog(ERROR, "invalid relpartbound for relation %u", inhrelid);
245 * If the PartitionBoundSpec says this is the default partition, its
246 * OID should match pg_partitioned_table.partdefid; if not, the
247 * catalog is corrupt.
249 if (boundspec->is_default)
251 Oid partdefid;
253 partdefid = get_default_partition_oid(RelationGetRelid(rel));
254 if (partdefid != inhrelid)
255 elog(ERROR, "expected partdefid %u, but got %u",
256 inhrelid, partdefid);
259 /* Save results. */
260 oids[i] = inhrelid;
261 is_leaf[i] = (get_rel_relkind(inhrelid) != RELKIND_PARTITIONED_TABLE);
262 boundspecs[i] = boundspec;
263 ++i;
267 * Create PartitionBoundInfo and mapping, working in the caller's context.
268 * This could fail, but we haven't done any damage if so.
270 if (nparts > 0)
271 boundinfo = partition_bounds_create(boundspecs, nparts, key, &mapping);
274 * Now build the actual relcache partition descriptor, copying all the
275 * data into a new, small context. As per above comment, we don't make
276 * this a long-lived context until it's finished.
278 new_pdcxt = AllocSetContextCreate(CurTransactionContext,
279 "partition descriptor",
280 ALLOCSET_SMALL_SIZES);
281 MemoryContextCopyAndSetIdentifier(new_pdcxt,
282 RelationGetRelationName(rel));
284 partdesc = (PartitionDescData *)
285 MemoryContextAllocZero(new_pdcxt, sizeof(PartitionDescData));
286 partdesc->nparts = nparts;
287 partdesc->detached_exist = detached_exist;
288 /* If there are no partitions, the rest of the partdesc can stay zero */
289 if (nparts > 0)
291 oldcxt = MemoryContextSwitchTo(new_pdcxt);
292 partdesc->boundinfo = partition_bounds_copy(boundinfo, key);
294 /* Initialize caching fields for speeding up ExecFindPartition */
295 partdesc->last_found_datum_index = -1;
296 partdesc->last_found_part_index = -1;
297 partdesc->last_found_count = 0;
299 partdesc->oids = (Oid *) palloc(nparts * sizeof(Oid));
300 partdesc->is_leaf = (bool *) palloc(nparts * sizeof(bool));
303 * Assign OIDs from the original array into mapped indexes of the
304 * result array. The order of OIDs in the former is defined by the
305 * catalog scan that retrieved them, whereas that in the latter is
306 * defined by canonicalized representation of the partition bounds.
307 * Also save leaf-ness of each partition.
309 for (i = 0; i < nparts; i++)
311 int index = mapping[i];
313 partdesc->oids[index] = oids[i];
314 partdesc->is_leaf[index] = is_leaf[i];
316 MemoryContextSwitchTo(oldcxt);
320 * Are we working with the partdesc that omits the detached partition, or
321 * the one that includes it?
323 * Note that if a partition was found by the catalog's scan to have been
324 * detached, but the pg_inherit tuple saying so was not visible to the
325 * active snapshot (find_inheritance_children_extended will not have set
326 * detached_xmin in that case), we consider there to be no "omittable"
327 * detached partitions.
329 is_omit = omit_detached && detached_exist && ActiveSnapshotSet() &&
330 TransactionIdIsValid(detached_xmin);
333 * We have a fully valid partdesc. Reparent it so that it has the right
334 * lifespan.
336 MemoryContextSetParent(new_pdcxt, CacheMemoryContext);
339 * Store it into relcache.
341 * But first, a kluge: if there's an old context for this type of
342 * descriptor, it contains an old partition descriptor that may still be
343 * referenced somewhere. Preserve it, while not leaking it, by
344 * reattaching it as a child context of the new one. Eventually it will
345 * get dropped by either RelationClose or RelationClearRelation. (We keep
346 * the regular partdesc in rd_pdcxt, and the partdesc-excluding-
347 * detached-partitions in rd_pddcxt.)
349 if (is_omit)
351 if (rel->rd_pddcxt != NULL)
352 MemoryContextSetParent(rel->rd_pddcxt, new_pdcxt);
353 rel->rd_pddcxt = new_pdcxt;
354 rel->rd_partdesc_nodetached = partdesc;
357 * For partdescs built excluding detached partitions, which we save
358 * separately, we also record the pg_inherits.xmin of the detached
359 * partition that was omitted; this informs a future potential user of
360 * such a cached partdesc to only use it after cross-checking that the
361 * xmin is indeed visible to the snapshot it is going to be working
362 * with.
364 Assert(TransactionIdIsValid(detached_xmin));
365 rel->rd_partdesc_nodetached_xmin = detached_xmin;
367 else
369 if (rel->rd_pdcxt != NULL)
370 MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt);
371 rel->rd_pdcxt = new_pdcxt;
372 rel->rd_partdesc = partdesc;
375 return partdesc;
379 * CreatePartitionDirectory
380 * Create a new partition directory object.
382 PartitionDirectory
383 CreatePartitionDirectory(MemoryContext mcxt, bool omit_detached)
385 MemoryContext oldcontext = MemoryContextSwitchTo(mcxt);
386 PartitionDirectory pdir;
387 HASHCTL ctl;
389 pdir = palloc(sizeof(PartitionDirectoryData));
390 pdir->pdir_mcxt = mcxt;
392 ctl.keysize = sizeof(Oid);
393 ctl.entrysize = sizeof(PartitionDirectoryEntry);
394 ctl.hcxt = mcxt;
396 pdir->pdir_hash = hash_create("partition directory", 256, &ctl,
397 HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
398 pdir->omit_detached = omit_detached;
400 MemoryContextSwitchTo(oldcontext);
401 return pdir;
405 * PartitionDirectoryLookup
406 * Look up the partition descriptor for a relation in the directory.
408 * The purpose of this function is to ensure that we get the same
409 * PartitionDesc for each relation every time we look it up. In the
410 * face of concurrent DDL, different PartitionDescs may be constructed with
411 * different views of the catalog state, but any single particular OID
412 * will always get the same PartitionDesc for as long as the same
413 * PartitionDirectory is used.
415 PartitionDesc
416 PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel)
418 PartitionDirectoryEntry *pde;
419 Oid relid = RelationGetRelid(rel);
420 bool found;
422 pde = hash_search(pdir->pdir_hash, &relid, HASH_ENTER, &found);
423 if (!found)
426 * We must keep a reference count on the relation so that the
427 * PartitionDesc to which we are pointing can't get destroyed.
429 RelationIncrementReferenceCount(rel);
430 pde->rel = rel;
431 pde->pd = RelationGetPartitionDesc(rel, pdir->omit_detached);
432 Assert(pde->pd != NULL);
434 return pde->pd;
438 * DestroyPartitionDirectory
439 * Destroy a partition directory.
441 * Release the reference counts we're holding.
443 void
444 DestroyPartitionDirectory(PartitionDirectory pdir)
446 HASH_SEQ_STATUS status;
447 PartitionDirectoryEntry *pde;
449 hash_seq_init(&status, pdir->pdir_hash);
450 while ((pde = hash_seq_search(&status)) != NULL)
451 RelationDecrementReferenceCount(pde->rel);
455 * get_default_oid_from_partdesc
457 * Given a partition descriptor, return the OID of the default partition, if
458 * one exists; else, return InvalidOid.
461 get_default_oid_from_partdesc(PartitionDesc partdesc)
463 if (partdesc && partdesc->boundinfo &&
464 partition_bound_has_default(partdesc->boundinfo))
465 return partdesc->oids[partdesc->boundinfo->default_index];
467 return InvalidOid;