Unmark gen_random_uuid() function leakproof.
[pgsql.git] / src / include / storage / proc.h
blob5a3dd5d2d40b46a60de6a74c6e4f0d713f16f91d
1 /*-------------------------------------------------------------------------
3 * proc.h
4 * per-process shared memory data structures
7 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/include/storage/proc.h
12 *-------------------------------------------------------------------------
14 #ifndef _PROC_H_
15 #define _PROC_H_
17 #include "access/clog.h"
18 #include "access/xlogdefs.h"
19 #include "lib/ilist.h"
20 #include "storage/latch.h"
21 #include "storage/lock.h"
22 #include "storage/pg_sema.h"
23 #include "storage/proclist_types.h"
24 #include "storage/procnumber.h"
27 * Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds
28 * for non-aborted subtransactions of its current top transaction. These
29 * have to be treated as running XIDs by other backends.
31 * We also keep track of whether the cache overflowed (ie, the transaction has
32 * generated at least one subtransaction that didn't fit in the cache).
33 * If none of the caches have overflowed, we can assume that an XID that's not
34 * listed anywhere in the PGPROC array is not a running transaction. Else we
35 * have to look at pg_subtrans.
37 * See src/test/isolation/specs/subxid-overflow.spec if you change this.
39 #define PGPROC_MAX_CACHED_SUBXIDS 64 /* XXX guessed-at value */
41 typedef struct XidCacheStatus
43 /* number of cached subxids, never more than PGPROC_MAX_CACHED_SUBXIDS */
44 uint8 count;
45 /* has PGPROC->subxids overflowed */
46 bool overflowed;
47 } XidCacheStatus;
49 struct XidCache
51 TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS];
55 * Flags for PGPROC->statusFlags and PROC_HDR->statusFlags[]
57 #define PROC_IS_AUTOVACUUM 0x01 /* is it an autovac worker? */
58 #define PROC_IN_VACUUM 0x02 /* currently running lazy vacuum */
59 #define PROC_IN_SAFE_IC 0x04 /* currently running CREATE INDEX
60 * CONCURRENTLY or REINDEX
61 * CONCURRENTLY on non-expressional,
62 * non-partial index */
63 #define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */
64 #define PROC_IN_LOGICAL_DECODING 0x10 /* currently doing logical
65 * decoding outside xact */
66 #define PROC_AFFECTS_ALL_HORIZONS 0x20 /* this proc's xmin must be
67 * included in vacuum horizons
68 * in all databases */
70 /* flags reset at EOXact */
71 #define PROC_VACUUM_STATE_MASK \
72 (PROC_IN_VACUUM | PROC_IN_SAFE_IC | PROC_VACUUM_FOR_WRAPAROUND)
75 * Xmin-related flags. Make sure any flags that affect how the process' Xmin
76 * value is interpreted by VACUUM are included here.
78 #define PROC_XMIN_FLAGS (PROC_IN_VACUUM | PROC_IN_SAFE_IC)
81 * We allow a limited number of "weak" relation locks (AccessShareLock,
82 * RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure
83 * (or rather in shared memory referenced from PGPROC) rather than the main
84 * lock table. This eases contention on the lock manager LWLocks. See
85 * storage/lmgr/README for additional details.
87 extern PGDLLIMPORT int FastPathLockGroupsPerBackend;
89 #define FP_LOCK_GROUPS_PER_BACKEND_MAX 1024
90 #define FP_LOCK_SLOTS_PER_GROUP 16 /* don't change */
91 #define FP_LOCK_SLOTS_PER_BACKEND (FP_LOCK_SLOTS_PER_GROUP * FastPathLockGroupsPerBackend)
94 * Flags for PGPROC.delayChkptFlags
96 * These flags can be used to delay the start or completion of a checkpoint
97 * for short periods. A flag is in effect if the corresponding bit is set in
98 * the PGPROC of any backend.
100 * For our purposes here, a checkpoint has three phases: (1) determine the
101 * location to which the redo pointer will be moved, (2) write all the
102 * data durably to disk, and (3) WAL-log the checkpoint.
104 * Setting DELAY_CHKPT_START prevents the system from moving from phase 1
105 * to phase 2. This is useful when we are performing a WAL-logged modification
106 * of data that will be flushed to disk in phase 2. By setting this flag
107 * before writing WAL and clearing it after we've both written WAL and
108 * performed the corresponding modification, we ensure that if the WAL record
109 * is inserted prior to the new redo point, the corresponding data changes will
110 * also be flushed to disk before the checkpoint can complete. (In the
111 * extremely common case where the data being modified is in shared buffers
112 * and we acquire an exclusive content lock on the relevant buffers before
113 * writing WAL, this mechanism is not needed, because phase 2 will block
114 * until we release the content lock and then flush the modified data to
115 * disk.)
117 * Setting DELAY_CHKPT_COMPLETE prevents the system from moving from phase 2
118 * to phase 3. This is useful if we are performing a WAL-logged operation that
119 * might invalidate buffers, such as relation truncation. In this case, we need
120 * to ensure that any buffers which were invalidated and thus not flushed by
121 * the checkpoint are actually destroyed on disk. Replay can cope with a file
122 * or block that doesn't exist, but not with a block that has the wrong
123 * contents.
125 #define DELAY_CHKPT_START (1<<0)
126 #define DELAY_CHKPT_COMPLETE (1<<1)
128 typedef enum
130 PROC_WAIT_STATUS_OK,
131 PROC_WAIT_STATUS_WAITING,
132 PROC_WAIT_STATUS_ERROR,
133 } ProcWaitStatus;
136 * Each backend has a PGPROC struct in shared memory. There is also a list of
137 * currently-unused PGPROC structs that will be reallocated to new backends.
139 * links: list link for any list the PGPROC is in. When waiting for a lock,
140 * the PGPROC is linked into that lock's waitProcs queue. A recycled PGPROC
141 * is linked into ProcGlobal's freeProcs list.
143 * Note: twophase.c also sets up a dummy PGPROC struct for each currently
144 * prepared transaction. These PGPROCs appear in the ProcArray data structure
145 * so that the prepared transactions appear to be still running and are
146 * correctly shown as holding locks. A prepared transaction PGPROC can be
147 * distinguished from a real one at need by the fact that it has pid == 0.
148 * The semaphore and lock-activity fields in a prepared-xact PGPROC are unused,
149 * but its myProcLocks[] lists are valid.
151 * We allow many fields of this struct to be accessed without locks, such as
152 * delayChkptFlags and isBackgroundWorker. However, keep in mind that writing
153 * mirrored ones (see below) requires holding ProcArrayLock or XidGenLock in
154 * at least shared mode, so that pgxactoff does not change concurrently.
156 * Mirrored fields:
158 * Some fields in PGPROC (see "mirrored in ..." comment) are mirrored into an
159 * element of more densely packed ProcGlobal arrays. These arrays are indexed
160 * by PGPROC->pgxactoff. Both copies need to be maintained coherently.
162 * NB: The pgxactoff indexed value can *never* be accessed without holding
163 * locks.
165 * See PROC_HDR for details.
167 struct PGPROC
169 dlist_node links; /* list link if process is in a list */
170 dlist_head *procgloballist; /* procglobal list that owns this PGPROC */
172 PGSemaphore sem; /* ONE semaphore to sleep on */
173 ProcWaitStatus waitStatus;
175 Latch procLatch; /* generic latch for process */
178 TransactionId xid; /* id of top-level transaction currently being
179 * executed by this proc, if running and XID
180 * is assigned; else InvalidTransactionId.
181 * mirrored in ProcGlobal->xids[pgxactoff] */
183 TransactionId xmin; /* minimal running XID as it was when we were
184 * starting our xact, excluding LAZY VACUUM:
185 * vacuum must not remove tuples deleted by
186 * xid >= xmin ! */
188 int pid; /* Backend's process ID; 0 if prepared xact */
190 int pgxactoff; /* offset into various ProcGlobal->arrays with
191 * data mirrored from this PGPROC */
194 * Currently running top-level transaction's virtual xid. Together these
195 * form a VirtualTransactionId, but we don't use that struct because this
196 * is not atomically assignable as whole, and we want to enforce code to
197 * consider both parts separately. See comments at VirtualTransactionId.
199 struct
201 ProcNumber procNumber; /* For regular backends, equal to
202 * GetNumberFromPGProc(proc). For prepared
203 * xacts, ID of the original backend that
204 * processed the transaction. For unused
205 * PGPROC entries, INVALID_PROC_NUMBER. */
206 LocalTransactionId lxid; /* local id of top-level transaction
207 * currently * being executed by this
208 * proc, if running; else
209 * InvalidLocalTransactionId */
210 } vxid;
212 /* These fields are zero while a backend is still starting up: */
213 Oid databaseId; /* OID of database this backend is using */
214 Oid roleId; /* OID of role using this backend */
216 Oid tempNamespaceId; /* OID of temp schema this backend is
217 * using */
219 bool isBackgroundWorker; /* true if background worker. */
222 * While in hot standby mode, shows that a conflict signal has been sent
223 * for the current transaction. Set/cleared while holding ProcArrayLock,
224 * though not required. Accessed without lock, if needed.
226 bool recoveryConflictPending;
228 /* Info about LWLock the process is currently waiting for, if any. */
229 uint8 lwWaiting; /* see LWLockWaitState */
230 uint8 lwWaitMode; /* lwlock mode being waited for */
231 proclist_node lwWaitLink; /* position in LW lock wait list */
233 /* Support for condition variables. */
234 proclist_node cvWaitLink; /* position in CV wait list */
236 /* Info about lock the process is currently waiting for, if any. */
237 /* waitLock and waitProcLock are NULL if not currently waiting. */
238 LOCK *waitLock; /* Lock object we're sleeping on ... */
239 PROCLOCK *waitProcLock; /* Per-holder info for awaited lock */
240 LOCKMODE waitLockMode; /* type of lock we're waiting for */
241 LOCKMASK heldLocks; /* bitmask for lock types already held on this
242 * lock object by this backend */
243 pg_atomic_uint64 waitStart; /* time at which wait for lock acquisition
244 * started */
246 int delayChkptFlags; /* for DELAY_CHKPT_* flags */
248 uint8 statusFlags; /* this backend's status flags, see PROC_*
249 * above. mirrored in
250 * ProcGlobal->statusFlags[pgxactoff] */
253 * Info to allow us to wait for synchronous replication, if needed.
254 * waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend.
255 * syncRepState must not be touched except by owning process or WALSender.
256 * syncRepLinks used only while holding SyncRepLock.
258 XLogRecPtr waitLSN; /* waiting for this LSN or higher */
259 int syncRepState; /* wait state for sync rep */
260 dlist_node syncRepLinks; /* list link if process is in syncrep queue */
263 * All PROCLOCK objects for locks held or awaited by this backend are
264 * linked into one of these lists, according to the partition number of
265 * their lock.
267 dlist_head myProcLocks[NUM_LOCK_PARTITIONS];
269 XidCacheStatus subxidStatus; /* mirrored with
270 * ProcGlobal->subxidStates[i] */
271 struct XidCache subxids; /* cache for subtransaction XIDs */
273 /* Support for group XID clearing. */
274 /* true, if member of ProcArray group waiting for XID clear */
275 bool procArrayGroupMember;
276 /* next ProcArray group member waiting for XID clear */
277 pg_atomic_uint32 procArrayGroupNext;
280 * latest transaction id among the transaction's main XID and
281 * subtransactions
283 TransactionId procArrayGroupMemberXid;
285 uint32 wait_event_info; /* proc's wait information */
287 /* Support for group transaction status update. */
288 bool clogGroupMember; /* true, if member of clog group */
289 pg_atomic_uint32 clogGroupNext; /* next clog group member */
290 TransactionId clogGroupMemberXid; /* transaction id of clog group member */
291 XidStatus clogGroupMemberXidStatus; /* transaction status of clog
292 * group member */
293 int64 clogGroupMemberPage; /* clog page corresponding to
294 * transaction id of clog group member */
295 XLogRecPtr clogGroupMemberLsn; /* WAL location of commit record for clog
296 * group member */
298 /* Lock manager data, recording fast-path locks taken by this backend. */
299 LWLock fpInfoLock; /* protects per-backend fast-path state */
300 uint64 *fpLockBits; /* lock modes held for each fast-path slot */
301 Oid *fpRelId; /* slots for rel oids */
302 bool fpVXIDLock; /* are we holding a fast-path VXID lock? */
303 LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID
304 * lock */
307 * Support for lock groups. Use LockHashPartitionLockByProc on the group
308 * leader to get the LWLock protecting these fields.
310 PGPROC *lockGroupLeader; /* lock group leader, if I'm a member */
311 dlist_head lockGroupMembers; /* list of members, if I'm a leader */
312 dlist_node lockGroupLink; /* my member link, if I'm a member */
315 /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
318 extern PGDLLIMPORT PGPROC *MyProc;
320 /* Proc number of this backend. Equal to GetNumberFromPGProc(MyProc). */
321 extern PGDLLIMPORT ProcNumber MyProcNumber;
323 /* Our parallel session leader, or INVALID_PROC_NUMBER if none */
324 extern PGDLLIMPORT ProcNumber ParallelLeaderProcNumber;
327 * The proc number to use for our session's temp relations is normally our own,
328 * but parallel workers should use their leader's ID.
330 #define ProcNumberForTempRelations() \
331 (ParallelLeaderProcNumber == INVALID_PROC_NUMBER ? MyProcNumber : ParallelLeaderProcNumber)
334 * There is one ProcGlobal struct for the whole database cluster.
336 * Adding/Removing an entry into the procarray requires holding *both*
337 * ProcArrayLock and XidGenLock in exclusive mode (in that order). Both are
338 * needed because the dense arrays (see below) are accessed from
339 * GetNewTransactionId() and GetSnapshotData(), and we don't want to add
340 * further contention by both using the same lock. Adding/Removing a procarray
341 * entry is much less frequent.
343 * Some fields in PGPROC are mirrored into more densely packed arrays (e.g.
344 * xids), with one entry for each backend. These arrays only contain entries
345 * for PGPROCs that have been added to the shared array with ProcArrayAdd()
346 * (in contrast to PGPROC array which has unused PGPROCs interspersed).
348 * The dense arrays are indexed by PGPROC->pgxactoff. Any concurrent
349 * ProcArrayAdd() / ProcArrayRemove() can lead to pgxactoff of a procarray
350 * member to change. Therefore it is only safe to use PGPROC->pgxactoff to
351 * access the dense array while holding either ProcArrayLock or XidGenLock.
353 * As long as a PGPROC is in the procarray, the mirrored values need to be
354 * maintained in both places in a coherent manner.
356 * The denser separate arrays are beneficial for three main reasons: First, to
357 * allow for as tight loops accessing the data as possible. Second, to prevent
358 * updates of frequently changing data (e.g. xmin) from invalidating
359 * cachelines also containing less frequently changing data (e.g. xid,
360 * statusFlags). Third to condense frequently accessed data into as few
361 * cachelines as possible.
363 * There are two main reasons to have the data mirrored between these dense
364 * arrays and PGPROC. First, as explained above, a PGPROC's array entries can
365 * only be accessed with either ProcArrayLock or XidGenLock held, whereas the
366 * PGPROC entries do not require that (obviously there may still be locking
367 * requirements around the individual field, separate from the concerns
368 * here). That is particularly important for a backend to efficiently checks
369 * it own values, which it often can safely do without locking. Second, the
370 * PGPROC fields allow to avoid unnecessary accesses and modification to the
371 * dense arrays. A backend's own PGPROC is more likely to be in a local cache,
372 * whereas the cachelines for the dense array will be modified by other
373 * backends (often removing it from the cache for other cores/sockets). At
374 * commit/abort time a check of the PGPROC value can avoid accessing/dirtying
375 * the corresponding array value.
377 * Basically it makes sense to access the PGPROC variable when checking a
378 * single backend's data, especially when already looking at the PGPROC for
379 * other reasons already. It makes sense to look at the "dense" arrays if we
380 * need to look at many / most entries, because we then benefit from the
381 * reduced indirection and better cross-process cache-ability.
383 * When entering a PGPROC for 2PC transactions with ProcArrayAdd(), the data
384 * in the dense arrays is initialized from the PGPROC while it already holds
385 * ProcArrayLock.
387 typedef struct PROC_HDR
389 /* Array of PGPROC structures (not including dummies for prepared txns) */
390 PGPROC *allProcs;
392 /* Array mirroring PGPROC.xid for each PGPROC currently in the procarray */
393 TransactionId *xids;
396 * Array mirroring PGPROC.subxidStatus for each PGPROC currently in the
397 * procarray.
399 XidCacheStatus *subxidStates;
402 * Array mirroring PGPROC.statusFlags for each PGPROC currently in the
403 * procarray.
405 uint8 *statusFlags;
407 /* Length of allProcs array */
408 uint32 allProcCount;
409 /* Head of list of free PGPROC structures */
410 dlist_head freeProcs;
411 /* Head of list of autovacuum's free PGPROC structures */
412 dlist_head autovacFreeProcs;
413 /* Head of list of bgworker free PGPROC structures */
414 dlist_head bgworkerFreeProcs;
415 /* Head of list of walsender free PGPROC structures */
416 dlist_head walsenderFreeProcs;
417 /* First pgproc waiting for group XID clear */
418 pg_atomic_uint32 procArrayGroupFirst;
419 /* First pgproc waiting for group transaction status update */
420 pg_atomic_uint32 clogGroupFirst;
423 * Current slot numbers of some auxiliary processes. There can be only one
424 * of each of these running at a time.
426 ProcNumber walwriterProc;
427 ProcNumber checkpointerProc;
429 /* Current shared estimate of appropriate spins_per_delay value */
430 int spins_per_delay;
431 /* Buffer id of the buffer that Startup process waits for pin on, or -1 */
432 int startupBufferPinWaitBufId;
433 } PROC_HDR;
435 extern PGDLLIMPORT PROC_HDR *ProcGlobal;
437 extern PGDLLIMPORT PGPROC *PreparedXactProcs;
440 * Accessors for getting PGPROC given a ProcNumber and vice versa.
442 #define GetPGProcByNumber(n) (&ProcGlobal->allProcs[(n)])
443 #define GetNumberFromPGProc(proc) ((proc) - &ProcGlobal->allProcs[0])
446 * We set aside some extra PGPROC structures for auxiliary processes,
447 * ie things that aren't full-fledged backends but need shmem access.
449 * Background writer, checkpointer, WAL writer, WAL summarizer, and archiver
450 * run during normal operation. Startup process and WAL receiver also consume
451 * 2 slots, but WAL writer is launched only after startup has exited, so we
452 * only need 6 slots.
454 #define NUM_AUXILIARY_PROCS 6
456 /* configurable options */
457 extern PGDLLIMPORT int DeadlockTimeout;
458 extern PGDLLIMPORT int StatementTimeout;
459 extern PGDLLIMPORT int LockTimeout;
460 extern PGDLLIMPORT int IdleInTransactionSessionTimeout;
461 extern PGDLLIMPORT int TransactionTimeout;
462 extern PGDLLIMPORT int IdleSessionTimeout;
463 extern PGDLLIMPORT bool log_lock_waits;
465 #ifdef EXEC_BACKEND
466 extern PGDLLIMPORT slock_t *ProcStructLock;
467 extern PGDLLIMPORT PGPROC *AuxiliaryProcs;
468 #endif
472 * Function Prototypes
474 extern int ProcGlobalSemas(void);
475 extern Size ProcGlobalShmemSize(void);
476 extern void InitProcGlobal(void);
477 extern void InitProcess(void);
478 extern void InitProcessPhase2(void);
479 extern void InitAuxiliaryProcess(void);
481 extern void SetStartupBufferPinWaitBufId(int bufid);
482 extern int GetStartupBufferPinWaitBufId(void);
484 extern bool HaveNFreeProcs(int n, int *nfree);
485 extern void ProcReleaseLocks(bool isCommit);
487 extern ProcWaitStatus JoinWaitQueue(LOCALLOCK *locallock,
488 LockMethod lockMethodTable, bool dontWait);
489 extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock);
490 extern void ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus);
491 extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
492 extern void CheckDeadLockAlert(void);
493 extern void LockErrorCleanup(void);
495 extern void ProcWaitForSignal(uint32 wait_event_info);
496 extern void ProcSendSignal(ProcNumber procNumber);
498 extern PGPROC *AuxiliaryPidGetProc(int pid);
500 extern void BecomeLockGroupLeader(void);
501 extern bool BecomeLockGroupMember(PGPROC *leader, int pid);
503 #endif /* _PROC_H_ */