Standardize rmgrdesc recovery conflict XID output.
[pgsql.git] / src / backend / storage / ipc / standby.c
blobf43229dfda610b0f3e0001944567bcc9d09d15b4
1 /*-------------------------------------------------------------------------
3 * standby.c
4 * Misc functions used in Hot Standby mode.
6 * All functions for handling RM_STANDBY_ID, which relate to
7 * AccessExclusiveLocks and starting snapshots for Hot Standby mode.
8 * Plus conflict recovery processing.
10 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
13 * IDENTIFICATION
14 * src/backend/storage/ipc/standby.c
16 *-------------------------------------------------------------------------
18 #include "postgres.h"
19 #include "access/transam.h"
20 #include "access/twophase.h"
21 #include "access/xact.h"
22 #include "access/xloginsert.h"
23 #include "access/xlogrecovery.h"
24 #include "access/xlogutils.h"
25 #include "miscadmin.h"
26 #include "pgstat.h"
27 #include "storage/bufmgr.h"
28 #include "storage/lmgr.h"
29 #include "storage/proc.h"
30 #include "storage/procarray.h"
31 #include "storage/sinvaladt.h"
32 #include "storage/standby.h"
33 #include "utils/hsearch.h"
34 #include "utils/memutils.h"
35 #include "utils/ps_status.h"
36 #include "utils/timeout.h"
37 #include "utils/timestamp.h"
39 /* User-settable GUC parameters */
40 int vacuum_defer_cleanup_age;
41 int max_standby_archive_delay = 30 * 1000;
42 int max_standby_streaming_delay = 30 * 1000;
43 bool log_recovery_conflict_waits = false;
46 * Keep track of all the exclusive locks owned by original transactions.
47 * For each known exclusive lock, there is a RecoveryLockEntry in the
48 * RecoveryLockHash hash table. All RecoveryLockEntrys belonging to a
49 * given XID are chained together so that we can find them easily.
50 * For each original transaction that is known to have any such locks,
51 * there is a RecoveryLockXidEntry in the RecoveryLockXidHash hash table,
52 * which stores the head of the chain of its locks.
54 typedef struct RecoveryLockEntry
56 xl_standby_lock key; /* hash key: xid, dbOid, relOid */
57 struct RecoveryLockEntry *next; /* chain link */
58 } RecoveryLockEntry;
60 typedef struct RecoveryLockXidEntry
62 TransactionId xid; /* hash key -- must be first */
63 struct RecoveryLockEntry *head; /* chain head */
64 } RecoveryLockXidEntry;
66 static HTAB *RecoveryLockHash = NULL;
67 static HTAB *RecoveryLockXidHash = NULL;
69 /* Flags set by timeout handlers */
70 static volatile sig_atomic_t got_standby_deadlock_timeout = false;
71 static volatile sig_atomic_t got_standby_delay_timeout = false;
72 static volatile sig_atomic_t got_standby_lock_timeout = false;
74 static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
75 ProcSignalReason reason,
76 uint32 wait_event_info,
77 bool report_waiting);
78 static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
79 static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
80 static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
81 static const char *get_recovery_conflict_desc(ProcSignalReason reason);
84 * InitRecoveryTransactionEnvironment
85 * Initialize tracking of our primary's in-progress transactions.
87 * We need to issue shared invalidations and hold locks. Holding locks
88 * means others may want to wait on us, so we need to make a lock table
89 * vxact entry like a real transaction. We could create and delete
90 * lock table entries for each transaction but its simpler just to create
91 * one permanent entry and leave it there all the time. Locks are then
92 * acquired and released as needed. Yes, this means you can see the
93 * Startup process in pg_locks once we have run this.
95 void
96 InitRecoveryTransactionEnvironment(void)
98 VirtualTransactionId vxid;
99 HASHCTL hash_ctl;
101 Assert(RecoveryLockHash == NULL); /* don't run this twice */
104 * Initialize the hash tables for tracking the locks held by each
105 * transaction.
107 hash_ctl.keysize = sizeof(xl_standby_lock);
108 hash_ctl.entrysize = sizeof(RecoveryLockEntry);
109 RecoveryLockHash = hash_create("RecoveryLockHash",
111 &hash_ctl,
112 HASH_ELEM | HASH_BLOBS);
113 hash_ctl.keysize = sizeof(TransactionId);
114 hash_ctl.entrysize = sizeof(RecoveryLockXidEntry);
115 RecoveryLockXidHash = hash_create("RecoveryLockXidHash",
117 &hash_ctl,
118 HASH_ELEM | HASH_BLOBS);
121 * Initialize shared invalidation management for Startup process, being
122 * careful to register ourselves as a sendOnly process so we don't need to
123 * read messages, nor will we get signaled when the queue starts filling
124 * up.
126 SharedInvalBackendInit(true);
129 * Lock a virtual transaction id for Startup process.
131 * We need to do GetNextLocalTransactionId() because
132 * SharedInvalBackendInit() leaves localTransactionId invalid and the lock
133 * manager doesn't like that at all.
135 * Note that we don't need to run XactLockTableInsert() because nobody
136 * needs to wait on xids. That sounds a little strange, but table locks
137 * are held by vxids and row level locks are held by xids. All queries
138 * hold AccessShareLocks so never block while we write or lock new rows.
140 vxid.backendId = MyBackendId;
141 vxid.localTransactionId = GetNextLocalTransactionId();
142 VirtualXactLockTableInsert(vxid);
144 standbyState = STANDBY_INITIALIZED;
148 * ShutdownRecoveryTransactionEnvironment
149 * Shut down transaction tracking
151 * Prepare to switch from hot standby mode to normal operation. Shut down
152 * recovery-time transaction tracking.
154 * This must be called even in shutdown of startup process if transaction
155 * tracking has been initialized. Otherwise some locks the tracked
156 * transactions were holding will not be released and may interfere with
157 * the processes still running (but will exit soon later) at the exit of
158 * startup process.
160 void
161 ShutdownRecoveryTransactionEnvironment(void)
164 * Do nothing if RecoveryLockHash is NULL because that means that
165 * transaction tracking has not yet been initialized or has already been
166 * shut down. This makes it safe to have possibly-redundant calls of this
167 * function during process exit.
169 if (RecoveryLockHash == NULL)
170 return;
172 /* Mark all tracked in-progress transactions as finished. */
173 ExpireAllKnownAssignedTransactionIds();
175 /* Release all locks the tracked transactions were holding */
176 StandbyReleaseAllLocks();
178 /* Destroy the lock hash tables. */
179 hash_destroy(RecoveryLockHash);
180 hash_destroy(RecoveryLockXidHash);
181 RecoveryLockHash = NULL;
182 RecoveryLockXidHash = NULL;
184 /* Cleanup our VirtualTransaction */
185 VirtualXactLockTableCleanup();
190 * -----------------------------------------------------
191 * Standby wait timers and backend cancel logic
192 * -----------------------------------------------------
196 * Determine the cutoff time at which we want to start canceling conflicting
197 * transactions. Returns zero (a time safely in the past) if we are willing
198 * to wait forever.
200 static TimestampTz
201 GetStandbyLimitTime(void)
203 TimestampTz rtime;
204 bool fromStream;
207 * The cutoff time is the last WAL data receipt time plus the appropriate
208 * delay variable. Delay of -1 means wait forever.
210 GetXLogReceiptTime(&rtime, &fromStream);
211 if (fromStream)
213 if (max_standby_streaming_delay < 0)
214 return 0; /* wait forever */
215 return TimestampTzPlusMilliseconds(rtime, max_standby_streaming_delay);
217 else
219 if (max_standby_archive_delay < 0)
220 return 0; /* wait forever */
221 return TimestampTzPlusMilliseconds(rtime, max_standby_archive_delay);
225 #define STANDBY_INITIAL_WAIT_US 1000
226 static int standbyWait_us = STANDBY_INITIAL_WAIT_US;
229 * Standby wait logic for ResolveRecoveryConflictWithVirtualXIDs.
230 * We wait here for a while then return. If we decide we can't wait any
231 * more then we return true, if we can wait some more return false.
233 static bool
234 WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
236 TimestampTz ltime;
238 CHECK_FOR_INTERRUPTS();
240 /* Are we past the limit time? */
241 ltime = GetStandbyLimitTime();
242 if (ltime && GetCurrentTimestamp() >= ltime)
243 return true;
246 * Sleep a bit (this is essential to avoid busy-waiting).
248 pgstat_report_wait_start(wait_event_info);
249 pg_usleep(standbyWait_us);
250 pgstat_report_wait_end();
253 * Progressively increase the sleep times, but not to more than 1s, since
254 * pg_usleep isn't interruptible on some platforms.
256 standbyWait_us *= 2;
257 if (standbyWait_us > 1000000)
258 standbyWait_us = 1000000;
260 return false;
264 * Log the recovery conflict.
266 * wait_start is the timestamp when the caller started to wait.
267 * now is the timestamp when this function has been called.
268 * wait_list is the list of virtual transaction ids assigned to
269 * conflicting processes. still_waiting indicates whether
270 * the startup process is still waiting for the recovery conflict
271 * to be resolved or not.
273 void
274 LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
275 TimestampTz now, VirtualTransactionId *wait_list,
276 bool still_waiting)
278 long secs;
279 int usecs;
280 long msecs;
281 StringInfoData buf;
282 int nprocs = 0;
285 * There must be no conflicting processes when the recovery conflict has
286 * already been resolved.
288 Assert(still_waiting || wait_list == NULL);
290 TimestampDifference(wait_start, now, &secs, &usecs);
291 msecs = secs * 1000 + usecs / 1000;
292 usecs = usecs % 1000;
294 if (wait_list)
296 VirtualTransactionId *vxids;
298 /* Construct a string of list of the conflicting processes */
299 vxids = wait_list;
300 while (VirtualTransactionIdIsValid(*vxids))
302 PGPROC *proc = BackendIdGetProc(vxids->backendId);
304 /* proc can be NULL if the target backend is not active */
305 if (proc)
307 if (nprocs == 0)
309 initStringInfo(&buf);
310 appendStringInfo(&buf, "%d", proc->pid);
312 else
313 appendStringInfo(&buf, ", %d", proc->pid);
315 nprocs++;
318 vxids++;
323 * If wait_list is specified, report the list of PIDs of active
324 * conflicting backends in a detail message. Note that if all the backends
325 * in the list are not active, no detail message is logged.
327 if (still_waiting)
329 ereport(LOG,
330 errmsg("recovery still waiting after %ld.%03d ms: %s",
331 msecs, usecs, get_recovery_conflict_desc(reason)),
332 nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
333 "Conflicting processes: %s.",
334 nprocs, buf.data) : 0);
336 else
338 ereport(LOG,
339 errmsg("recovery finished waiting after %ld.%03d ms: %s",
340 msecs, usecs, get_recovery_conflict_desc(reason)));
343 if (nprocs > 0)
344 pfree(buf.data);
348 * This is the main executioner for any query backend that conflicts with
349 * recovery processing. Judgement has already been passed on it within
350 * a specific rmgr. Here we just issue the orders to the procs. The procs
351 * then throw the required error as instructed.
353 * If report_waiting is true, "waiting" is reported in PS display and the
354 * wait for recovery conflict is reported in the log, if necessary. If
355 * the caller is responsible for reporting them, report_waiting should be
356 * false. Otherwise, both the caller and this function report the same
357 * thing unexpectedly.
359 static void
360 ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
361 ProcSignalReason reason, uint32 wait_event_info,
362 bool report_waiting)
364 TimestampTz waitStart = 0;
365 char *new_status = NULL;
366 bool logged_recovery_conflict = false;
368 /* Fast exit, to avoid a kernel call if there's no work to be done. */
369 if (!VirtualTransactionIdIsValid(*waitlist))
370 return;
372 /* Set the wait start timestamp for reporting */
373 if (report_waiting && (log_recovery_conflict_waits || update_process_title))
374 waitStart = GetCurrentTimestamp();
376 while (VirtualTransactionIdIsValid(*waitlist))
378 /* reset standbyWait_us for each xact we wait for */
379 standbyWait_us = STANDBY_INITIAL_WAIT_US;
381 /* wait until the virtual xid is gone */
382 while (!VirtualXactLock(*waitlist, false))
384 /* Is it time to kill it? */
385 if (WaitExceedsMaxStandbyDelay(wait_event_info))
387 pid_t pid;
390 * Now find out who to throw out of the balloon.
392 Assert(VirtualTransactionIdIsValid(*waitlist));
393 pid = CancelVirtualTransaction(*waitlist, reason);
396 * Wait a little bit for it to die so that we avoid flooding
397 * an unresponsive backend when system is heavily loaded.
399 if (pid != 0)
400 pg_usleep(5000L);
403 if (waitStart != 0 && (!logged_recovery_conflict || new_status == NULL))
405 TimestampTz now = 0;
406 bool maybe_log_conflict;
407 bool maybe_update_title;
409 maybe_log_conflict = (log_recovery_conflict_waits && !logged_recovery_conflict);
410 maybe_update_title = (update_process_title && new_status == NULL);
412 /* Get the current timestamp if not report yet */
413 if (maybe_log_conflict || maybe_update_title)
414 now = GetCurrentTimestamp();
417 * Report via ps if we have been waiting for more than 500
418 * msec (should that be configurable?)
420 if (maybe_update_title &&
421 TimestampDifferenceExceeds(waitStart, now, 500))
423 const char *old_status;
424 int len;
426 old_status = get_ps_display(&len);
427 new_status = (char *) palloc(len + 8 + 1);
428 memcpy(new_status, old_status, len);
429 strcpy(new_status + len, " waiting");
430 set_ps_display(new_status);
431 new_status[len] = '\0'; /* truncate off " waiting" */
435 * Emit the log message if the startup process is waiting
436 * longer than deadlock_timeout for recovery conflict.
438 if (maybe_log_conflict &&
439 TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout))
441 LogRecoveryConflict(reason, waitStart, now, waitlist, true);
442 logged_recovery_conflict = true;
447 /* The virtual transaction is gone now, wait for the next one */
448 waitlist++;
452 * Emit the log message if recovery conflict was resolved but the startup
453 * process waited longer than deadlock_timeout for it.
455 if (logged_recovery_conflict)
456 LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(),
457 NULL, false);
459 /* Reset ps display if we changed it */
460 if (new_status)
462 set_ps_display(new_status);
463 pfree(new_status);
468 * Generate whatever recovery conflicts are needed to eliminate snapshots that
469 * might see XIDs <= snapshotConflictHorizon as still running.
471 * snapshotConflictHorizon cutoffs are our standard approach to generating
472 * granular recovery conflicts. Note that InvalidTransactionId values are
473 * interpreted as "definitely don't need any conflicts" here, which is a
474 * general convention that WAL records can (and often do) depend on.
476 void
477 ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon,
478 RelFileLocator locator)
480 VirtualTransactionId *backends;
483 * If we get passed InvalidTransactionId then we do nothing (no conflict).
485 * This can happen when replaying already-applied WAL records after a
486 * standby crash or restart, or when replaying an XLOG_HEAP2_VISIBLE
487 * record that marks as frozen a page which was already all-visible. It's
488 * also quite common with records generated during index deletion
489 * (original execution of the deletion can reason that a recovery conflict
490 * which is sufficient for the deletion operation must take place before
491 * replay of the deletion record itself).
493 if (!TransactionIdIsValid(snapshotConflictHorizon))
494 return;
496 backends = GetConflictingVirtualXIDs(snapshotConflictHorizon,
497 locator.dbOid);
498 ResolveRecoveryConflictWithVirtualXIDs(backends,
499 PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
500 WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT,
501 true);
505 * Variant of ResolveRecoveryConflictWithSnapshot that works with
506 * FullTransactionId values
508 void
509 ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId snapshotConflictHorizon,
510 RelFileLocator locator)
513 * ResolveRecoveryConflictWithSnapshot operates on 32-bit TransactionIds,
514 * so truncate the logged FullTransactionId. If the logged value is very
515 * old, so that XID wrap-around already happened on it, there can't be any
516 * snapshots that still see it.
518 FullTransactionId nextXid = ReadNextFullTransactionId();
519 uint64 diff;
521 diff = U64FromFullTransactionId(nextXid) -
522 U64FromFullTransactionId(snapshotConflictHorizon);
523 if (diff < MaxTransactionId / 2)
525 TransactionId truncated;
527 truncated = XidFromFullTransactionId(snapshotConflictHorizon);
528 ResolveRecoveryConflictWithSnapshot(truncated, locator);
532 void
533 ResolveRecoveryConflictWithTablespace(Oid tsid)
535 VirtualTransactionId *temp_file_users;
538 * Standby users may be currently using this tablespace for their
539 * temporary files. We only care about current users because
540 * temp_tablespace parameter will just ignore tablespaces that no longer
541 * exist.
543 * Ask everybody to cancel their queries immediately so we can ensure no
544 * temp files remain and we can remove the tablespace. Nuke the entire
545 * site from orbit, it's the only way to be sure.
547 * XXX: We could work out the pids of active backends using this
548 * tablespace by examining the temp filenames in the directory. We would
549 * then convert the pids into VirtualXIDs before attempting to cancel
550 * them.
552 * We don't wait for commit because drop tablespace is non-transactional.
554 temp_file_users = GetConflictingVirtualXIDs(InvalidTransactionId,
555 InvalidOid);
556 ResolveRecoveryConflictWithVirtualXIDs(temp_file_users,
557 PROCSIG_RECOVERY_CONFLICT_TABLESPACE,
558 WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE,
559 true);
562 void
563 ResolveRecoveryConflictWithDatabase(Oid dbid)
566 * We don't do ResolveRecoveryConflictWithVirtualXIDs() here since that
567 * only waits for transactions and completely idle sessions would block
568 * us. This is rare enough that we do this as simply as possible: no wait,
569 * just force them off immediately.
571 * No locking is required here because we already acquired
572 * AccessExclusiveLock. Anybody trying to connect while we do this will
573 * block during InitPostgres() and then disconnect when they see the
574 * database has been removed.
576 while (CountDBBackends(dbid) > 0)
578 CancelDBBackends(dbid, PROCSIG_RECOVERY_CONFLICT_DATABASE, true);
581 * Wait awhile for them to die so that we avoid flooding an
582 * unresponsive backend when system is heavily loaded.
584 pg_usleep(10000);
589 * ResolveRecoveryConflictWithLock is called from ProcSleep()
590 * to resolve conflicts with other backends holding relation locks.
592 * The WaitLatch sleep normally done in ProcSleep()
593 * (when not InHotStandby) is performed here, for code clarity.
595 * We either resolve conflicts immediately or set a timeout to wake us at
596 * the limit of our patience.
598 * Resolve conflicts by canceling to all backends holding a conflicting
599 * lock. As we are already queued to be granted the lock, no new lock
600 * requests conflicting with ours will be granted in the meantime.
602 * We also must check for deadlocks involving the Startup process and
603 * hot-standby backend processes. If deadlock_timeout is reached in
604 * this function, all the backends holding the conflicting locks are
605 * requested to check themselves for deadlocks.
607 * logging_conflict should be true if the recovery conflict has not been
608 * logged yet even though logging is enabled. After deadlock_timeout is
609 * reached and the request for deadlock check is sent, we wait again to
610 * be signaled by the release of the lock if logging_conflict is false.
611 * Otherwise we return without waiting again so that the caller can report
612 * the recovery conflict. In this case, then, this function is called again
613 * with logging_conflict=false (because the recovery conflict has already
614 * been logged) and we will wait again for the lock to be released.
616 void
617 ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
619 TimestampTz ltime;
620 TimestampTz now;
622 Assert(InHotStandby);
624 ltime = GetStandbyLimitTime();
625 now = GetCurrentTimestamp();
628 * Update waitStart if first time through after the startup process
629 * started waiting for the lock. It should not be updated every time
630 * ResolveRecoveryConflictWithLock() is called during the wait.
632 * Use the current time obtained for comparison with ltime as waitStart
633 * (i.e., the time when this process started waiting for the lock). Since
634 * getting the current time newly can cause overhead, we reuse the
635 * already-obtained time to avoid that overhead.
637 * Note that waitStart is updated without holding the lock table's
638 * partition lock, to avoid the overhead by additional lock acquisition.
639 * This can cause "waitstart" in pg_locks to become NULL for a very short
640 * period of time after the wait started even though "granted" is false.
641 * This is OK in practice because we can assume that users are likely to
642 * look at "waitstart" when waiting for the lock for a long time.
644 if (pg_atomic_read_u64(&MyProc->waitStart) == 0)
645 pg_atomic_write_u64(&MyProc->waitStart, now);
647 if (now >= ltime && ltime != 0)
650 * We're already behind, so clear a path as quickly as possible.
652 VirtualTransactionId *backends;
654 backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
657 * Prevent ResolveRecoveryConflictWithVirtualXIDs() from reporting
658 * "waiting" in PS display by disabling its argument report_waiting
659 * because the caller, WaitOnLock(), has already reported that.
661 ResolveRecoveryConflictWithVirtualXIDs(backends,
662 PROCSIG_RECOVERY_CONFLICT_LOCK,
663 PG_WAIT_LOCK | locktag.locktag_type,
664 false);
666 else
669 * Wait (or wait again) until ltime, and check for deadlocks as well
670 * if we will be waiting longer than deadlock_timeout
672 EnableTimeoutParams timeouts[2];
673 int cnt = 0;
675 if (ltime != 0)
677 got_standby_lock_timeout = false;
678 timeouts[cnt].id = STANDBY_LOCK_TIMEOUT;
679 timeouts[cnt].type = TMPARAM_AT;
680 timeouts[cnt].fin_time = ltime;
681 cnt++;
684 got_standby_deadlock_timeout = false;
685 timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
686 timeouts[cnt].type = TMPARAM_AFTER;
687 timeouts[cnt].delay_ms = DeadlockTimeout;
688 cnt++;
690 enable_timeouts(timeouts, cnt);
693 /* Wait to be signaled by the release of the Relation Lock */
694 ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
697 * Exit if ltime is reached. Then all the backends holding conflicting
698 * locks will be canceled in the next ResolveRecoveryConflictWithLock()
699 * call.
701 if (got_standby_lock_timeout)
702 goto cleanup;
704 if (got_standby_deadlock_timeout)
706 VirtualTransactionId *backends;
708 backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
710 /* Quick exit if there's no work to be done */
711 if (!VirtualTransactionIdIsValid(*backends))
712 goto cleanup;
715 * Send signals to all the backends holding the conflicting locks, to
716 * ask them to check themselves for deadlocks.
718 while (VirtualTransactionIdIsValid(*backends))
720 SignalVirtualTransaction(*backends,
721 PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
722 false);
723 backends++;
727 * Exit if the recovery conflict has not been logged yet even though
728 * logging is enabled, so that the caller can log that. Then
729 * RecoveryConflictWithLock() is called again and we will wait again
730 * for the lock to be released.
732 if (logging_conflict)
733 goto cleanup;
736 * Wait again here to be signaled by the release of the Relation Lock,
737 * to prevent the subsequent RecoveryConflictWithLock() from causing
738 * deadlock_timeout and sending a request for deadlocks check again.
739 * Otherwise the request continues to be sent every deadlock_timeout
740 * until the relation locks are released or ltime is reached.
742 got_standby_deadlock_timeout = false;
743 ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
746 cleanup:
749 * Clear any timeout requests established above. We assume here that the
750 * Startup process doesn't have any other outstanding timeouts than those
751 * used by this function. If that stops being true, we could cancel the
752 * timeouts individually, but that'd be slower.
754 disable_all_timeouts(false);
755 got_standby_lock_timeout = false;
756 got_standby_deadlock_timeout = false;
760 * ResolveRecoveryConflictWithBufferPin is called from LockBufferForCleanup()
761 * to resolve conflicts with other backends holding buffer pins.
763 * The ProcWaitForSignal() sleep normally done in LockBufferForCleanup()
764 * (when not InHotStandby) is performed here, for code clarity.
766 * We either resolve conflicts immediately or set a timeout to wake us at
767 * the limit of our patience.
769 * Resolve conflicts by sending a PROCSIG signal to all backends to check if
770 * they hold one of the buffer pins that is blocking Startup process. If so,
771 * those backends will take an appropriate error action, ERROR or FATAL.
773 * We also must check for deadlocks. Deadlocks occur because if queries
774 * wait on a lock, that must be behind an AccessExclusiveLock, which can only
775 * be cleared if the Startup process replays a transaction completion record.
776 * If Startup process is also waiting then that is a deadlock. The deadlock
777 * can occur if the query is waiting and then the Startup sleeps, or if
778 * Startup is sleeping and the query waits on a lock. We protect against
779 * only the former sequence here, the latter sequence is checked prior to
780 * the query sleeping, in CheckRecoveryConflictDeadlock().
782 * Deadlocks are extremely rare, and relatively expensive to check for,
783 * so we don't do a deadlock check right away ... only if we have had to wait
784 * at least deadlock_timeout.
786 void
787 ResolveRecoveryConflictWithBufferPin(void)
789 TimestampTz ltime;
791 Assert(InHotStandby);
793 ltime = GetStandbyLimitTime();
795 if (GetCurrentTimestamp() >= ltime && ltime != 0)
798 * We're already behind, so clear a path as quickly as possible.
800 SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
802 else
805 * Wake up at ltime, and check for deadlocks as well if we will be
806 * waiting longer than deadlock_timeout
808 EnableTimeoutParams timeouts[2];
809 int cnt = 0;
811 if (ltime != 0)
813 timeouts[cnt].id = STANDBY_TIMEOUT;
814 timeouts[cnt].type = TMPARAM_AT;
815 timeouts[cnt].fin_time = ltime;
816 cnt++;
819 got_standby_deadlock_timeout = false;
820 timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
821 timeouts[cnt].type = TMPARAM_AFTER;
822 timeouts[cnt].delay_ms = DeadlockTimeout;
823 cnt++;
825 enable_timeouts(timeouts, cnt);
829 * Wait to be signaled by UnpinBuffer() or for the wait to be interrupted
830 * by one of the timeouts established above.
832 * We assume that only UnpinBuffer() and the timeout requests established
833 * above can wake us up here. WakeupRecovery() called by walreceiver or
834 * SIGHUP signal handler, etc cannot do that because it uses the different
835 * latch from that ProcWaitForSignal() waits on.
837 ProcWaitForSignal(PG_WAIT_BUFFER_PIN);
839 if (got_standby_delay_timeout)
840 SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
841 else if (got_standby_deadlock_timeout)
844 * Send out a request for hot-standby backends to check themselves for
845 * deadlocks.
847 * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
848 * to be signaled by UnpinBuffer() again and send a request for
849 * deadlocks check if deadlock_timeout happens. This causes the
850 * request to continue to be sent every deadlock_timeout until the
851 * buffer is unpinned or ltime is reached. This would increase the
852 * workload in the startup process and backends. In practice it may
853 * not be so harmful because the period that the buffer is kept pinned
854 * is basically no so long. But we should fix this?
856 SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
860 * Clear any timeout requests established above. We assume here that the
861 * Startup process doesn't have any other timeouts than what this function
862 * uses. If that stops being true, we could cancel the timeouts
863 * individually, but that'd be slower.
865 disable_all_timeouts(false);
866 got_standby_delay_timeout = false;
867 got_standby_deadlock_timeout = false;
870 static void
871 SendRecoveryConflictWithBufferPin(ProcSignalReason reason)
873 Assert(reason == PROCSIG_RECOVERY_CONFLICT_BUFFERPIN ||
874 reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
877 * We send signal to all backends to ask them if they are holding the
878 * buffer pin which is delaying the Startup process. We must not set the
879 * conflict flag yet, since most backends will be innocent. Let the
880 * SIGUSR1 handling in each backend decide their own fate.
882 CancelDBBackends(InvalidOid, reason, false);
886 * In Hot Standby perform early deadlock detection. We abort the lock
887 * wait if we are about to sleep while holding the buffer pin that Startup
888 * process is waiting for.
890 * Note: this code is pessimistic, because there is no way for it to
891 * determine whether an actual deadlock condition is present: the lock we
892 * need to wait for might be unrelated to any held by the Startup process.
893 * Sooner or later, this mechanism should get ripped out in favor of somehow
894 * accounting for buffer locks in DeadLockCheck(). However, errors here
895 * seem to be very low-probability in practice, so for now it's not worth
896 * the trouble.
898 void
899 CheckRecoveryConflictDeadlock(void)
901 Assert(!InRecovery); /* do not call in Startup process */
903 if (!HoldingBufferPinThatDelaysRecovery())
904 return;
907 * Error message should match ProcessInterrupts() but we avoid calling
908 * that because we aren't handling an interrupt at this point. Note that
909 * we only cancel the current transaction here, so if we are in a
910 * subtransaction and the pin is held by a parent, then the Startup
911 * process will continue to wait even though we have avoided deadlock.
913 ereport(ERROR,
914 (errcode(ERRCODE_T_R_DEADLOCK_DETECTED),
915 errmsg("canceling statement due to conflict with recovery"),
916 errdetail("User transaction caused buffer deadlock with recovery.")));
920 /* --------------------------------
921 * timeout handler routines
922 * --------------------------------
926 * StandbyDeadLockHandler() will be called if STANDBY_DEADLOCK_TIMEOUT is
927 * exceeded.
929 void
930 StandbyDeadLockHandler(void)
932 got_standby_deadlock_timeout = true;
936 * StandbyTimeoutHandler() will be called if STANDBY_TIMEOUT is exceeded.
938 void
939 StandbyTimeoutHandler(void)
941 got_standby_delay_timeout = true;
945 * StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
947 void
948 StandbyLockTimeoutHandler(void)
950 got_standby_lock_timeout = true;
954 * -----------------------------------------------------
955 * Locking in Recovery Mode
956 * -----------------------------------------------------
958 * All locks are held by the Startup process using a single virtual
959 * transaction. This implementation is both simpler and in some senses,
960 * more correct. The locks held mean "some original transaction held
961 * this lock, so query access is not allowed at this time". So the Startup
962 * process is the proxy by which the original locks are implemented.
964 * We only keep track of AccessExclusiveLocks, which are only ever held by
965 * one transaction on one relation.
967 * We keep a table of known locks in the RecoveryLockHash hash table.
968 * The point of that table is to let us efficiently de-duplicate locks,
969 * which is important because checkpoints will re-report the same locks
970 * already held. There is also a RecoveryLockXidHash table with one entry
971 * per xid, which allows us to efficiently find all the locks held by a
972 * given original transaction.
974 * We use session locks rather than normal locks so we don't need
975 * ResourceOwners.
979 void
980 StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
982 RecoveryLockXidEntry *xidentry;
983 RecoveryLockEntry *lockentry;
984 xl_standby_lock key;
985 LOCKTAG locktag;
986 bool found;
988 /* Already processed? */
989 if (!TransactionIdIsValid(xid) ||
990 TransactionIdDidCommit(xid) ||
991 TransactionIdDidAbort(xid))
992 return;
994 elog(trace_recovery(DEBUG4),
995 "adding recovery lock: db %u rel %u", dbOid, relOid);
997 /* dbOid is InvalidOid when we are locking a shared relation. */
998 Assert(OidIsValid(relOid));
1000 /* Create a hash entry for this xid, if we don't have one already. */
1001 xidentry = hash_search(RecoveryLockXidHash, &xid, HASH_ENTER, &found);
1002 if (!found)
1004 Assert(xidentry->xid == xid); /* dynahash should have set this */
1005 xidentry->head = NULL;
1008 /* Create a hash entry for this lock, unless we have one already. */
1009 key.xid = xid;
1010 key.dbOid = dbOid;
1011 key.relOid = relOid;
1012 lockentry = hash_search(RecoveryLockHash, &key, HASH_ENTER, &found);
1013 if (!found)
1015 /* It's new, so link it into the XID's list ... */
1016 lockentry->next = xidentry->head;
1017 xidentry->head = lockentry;
1019 /* ... and acquire the lock locally. */
1020 SET_LOCKTAG_RELATION(locktag, dbOid, relOid);
1022 (void) LockAcquire(&locktag, AccessExclusiveLock, true, false);
1027 * Release all the locks associated with this RecoveryLockXidEntry.
1029 static void
1030 StandbyReleaseXidEntryLocks(RecoveryLockXidEntry *xidentry)
1032 RecoveryLockEntry *entry;
1033 RecoveryLockEntry *next;
1035 for (entry = xidentry->head; entry != NULL; entry = next)
1037 LOCKTAG locktag;
1039 elog(trace_recovery(DEBUG4),
1040 "releasing recovery lock: xid %u db %u rel %u",
1041 entry->key.xid, entry->key.dbOid, entry->key.relOid);
1042 /* Release the lock ... */
1043 SET_LOCKTAG_RELATION(locktag, entry->key.dbOid, entry->key.relOid);
1044 if (!LockRelease(&locktag, AccessExclusiveLock, true))
1046 elog(LOG,
1047 "RecoveryLockHash contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u",
1048 entry->key.xid, entry->key.dbOid, entry->key.relOid);
1049 Assert(false);
1051 /* ... and remove the per-lock hash entry */
1052 next = entry->next;
1053 hash_search(RecoveryLockHash, entry, HASH_REMOVE, NULL);
1056 xidentry->head = NULL; /* just for paranoia */
1060 * Release locks for specific XID, or all locks if it's InvalidXid.
1062 static void
1063 StandbyReleaseLocks(TransactionId xid)
1065 RecoveryLockXidEntry *entry;
1067 if (TransactionIdIsValid(xid))
1069 if ((entry = hash_search(RecoveryLockXidHash, &xid, HASH_FIND, NULL)))
1071 StandbyReleaseXidEntryLocks(entry);
1072 hash_search(RecoveryLockXidHash, entry, HASH_REMOVE, NULL);
1075 else
1076 StandbyReleaseAllLocks();
1080 * Release locks for a transaction tree, starting at xid down, from
1081 * RecoveryLockXidHash.
1083 * Called during WAL replay of COMMIT/ROLLBACK when in hot standby mode,
1084 * to remove any AccessExclusiveLocks requested by a transaction.
1086 void
1087 StandbyReleaseLockTree(TransactionId xid, int nsubxids, TransactionId *subxids)
1089 int i;
1091 StandbyReleaseLocks(xid);
1093 for (i = 0; i < nsubxids; i++)
1094 StandbyReleaseLocks(subxids[i]);
1098 * Called at end of recovery and when we see a shutdown checkpoint.
1100 void
1101 StandbyReleaseAllLocks(void)
1103 HASH_SEQ_STATUS status;
1104 RecoveryLockXidEntry *entry;
1106 elog(trace_recovery(DEBUG2), "release all standby locks");
1108 hash_seq_init(&status, RecoveryLockXidHash);
1109 while ((entry = hash_seq_search(&status)))
1111 StandbyReleaseXidEntryLocks(entry);
1112 hash_search(RecoveryLockXidHash, entry, HASH_REMOVE, NULL);
1117 * StandbyReleaseOldLocks
1118 * Release standby locks held by top-level XIDs that aren't running,
1119 * as long as they're not prepared transactions.
1121 void
1122 StandbyReleaseOldLocks(TransactionId oldxid)
1124 HASH_SEQ_STATUS status;
1125 RecoveryLockXidEntry *entry;
1127 hash_seq_init(&status, RecoveryLockXidHash);
1128 while ((entry = hash_seq_search(&status)))
1130 Assert(TransactionIdIsValid(entry->xid));
1132 /* Skip if prepared transaction. */
1133 if (StandbyTransactionIdIsPrepared(entry->xid))
1134 continue;
1136 /* Skip if >= oldxid. */
1137 if (!TransactionIdPrecedes(entry->xid, oldxid))
1138 continue;
1140 /* Remove all locks and hash table entry. */
1141 StandbyReleaseXidEntryLocks(entry);
1142 hash_search(RecoveryLockXidHash, entry, HASH_REMOVE, NULL);
1147 * --------------------------------------------------------------------
1148 * Recovery handling for Rmgr RM_STANDBY_ID
1150 * These record types will only be created if XLogStandbyInfoActive()
1151 * --------------------------------------------------------------------
1154 void
1155 standby_redo(XLogReaderState *record)
1157 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1159 /* Backup blocks are not used in standby records */
1160 Assert(!XLogRecHasAnyBlockRefs(record));
1162 /* Do nothing if we're not in hot standby mode */
1163 if (standbyState == STANDBY_DISABLED)
1164 return;
1166 if (info == XLOG_STANDBY_LOCK)
1168 xl_standby_locks *xlrec = (xl_standby_locks *) XLogRecGetData(record);
1169 int i;
1171 for (i = 0; i < xlrec->nlocks; i++)
1172 StandbyAcquireAccessExclusiveLock(xlrec->locks[i].xid,
1173 xlrec->locks[i].dbOid,
1174 xlrec->locks[i].relOid);
1176 else if (info == XLOG_RUNNING_XACTS)
1178 xl_running_xacts *xlrec = (xl_running_xacts *) XLogRecGetData(record);
1179 RunningTransactionsData running;
1181 running.xcnt = xlrec->xcnt;
1182 running.subxcnt = xlrec->subxcnt;
1183 running.subxid_overflow = xlrec->subxid_overflow;
1184 running.nextXid = xlrec->nextXid;
1185 running.latestCompletedXid = xlrec->latestCompletedXid;
1186 running.oldestRunningXid = xlrec->oldestRunningXid;
1187 running.xids = xlrec->xids;
1189 ProcArrayApplyRecoveryInfo(&running);
1191 else if (info == XLOG_INVALIDATIONS)
1193 xl_invalidations *xlrec = (xl_invalidations *) XLogRecGetData(record);
1195 ProcessCommittedInvalidationMessages(xlrec->msgs,
1196 xlrec->nmsgs,
1197 xlrec->relcacheInitFileInval,
1198 xlrec->dbId,
1199 xlrec->tsId);
1201 else
1202 elog(PANIC, "standby_redo: unknown op code %u", info);
1206 * Log details of the current snapshot to WAL. This allows the snapshot state
1207 * to be reconstructed on the standby and for logical decoding.
1209 * This is used for Hot Standby as follows:
1211 * We can move directly to STANDBY_SNAPSHOT_READY at startup if we
1212 * start from a shutdown checkpoint because we know nothing was running
1213 * at that time and our recovery snapshot is known empty. In the more
1214 * typical case of an online checkpoint we need to jump through a few
1215 * hoops to get a correct recovery snapshot and this requires a two or
1216 * sometimes a three stage process.
1218 * The initial snapshot must contain all running xids and all current
1219 * AccessExclusiveLocks at a point in time on the standby. Assembling
1220 * that information while the server is running requires many and
1221 * various LWLocks, so we choose to derive that information piece by
1222 * piece and then re-assemble that info on the standby. When that
1223 * information is fully assembled we move to STANDBY_SNAPSHOT_READY.
1225 * Since locking on the primary when we derive the information is not
1226 * strict, we note that there is a time window between the derivation and
1227 * writing to WAL of the derived information. That allows race conditions
1228 * that we must resolve, since xids and locks may enter or leave the
1229 * snapshot during that window. This creates the issue that an xid or
1230 * lock may start *after* the snapshot has been derived yet *before* the
1231 * snapshot is logged in the running xacts WAL record. We resolve this by
1232 * starting to accumulate changes at a point just prior to when we derive
1233 * the snapshot on the primary, then ignore duplicates when we later apply
1234 * the snapshot from the running xacts record. This is implemented during
1235 * CreateCheckPoint() where we use the logical checkpoint location as
1236 * our starting point and then write the running xacts record immediately
1237 * before writing the main checkpoint WAL record. Since we always start
1238 * up from a checkpoint and are immediately at our starting point, we
1239 * unconditionally move to STANDBY_INITIALIZED. After this point we
1240 * must do 4 things:
1241 * * move shared nextXid forwards as we see new xids
1242 * * extend the clog and subtrans with each new xid
1243 * * keep track of uncommitted known assigned xids
1244 * * keep track of uncommitted AccessExclusiveLocks
1246 * When we see a commit/abort we must remove known assigned xids and locks
1247 * from the completing transaction. Attempted removals that cannot locate
1248 * an entry are expected and must not cause an error when we are in state
1249 * STANDBY_INITIALIZED. This is implemented in StandbyReleaseLocks() and
1250 * KnownAssignedXidsRemove().
1252 * Later, when we apply the running xact data we must be careful to ignore
1253 * transactions already committed, since those commits raced ahead when
1254 * making WAL entries.
1256 * The loose timing also means that locks may be recorded that have a
1257 * zero xid, since xids are removed from procs before locks are removed.
1258 * So we must prune the lock list down to ensure we hold locks only for
1259 * currently running xids, performed by StandbyReleaseOldLocks().
1260 * Zero xids should no longer be possible, but we may be replaying WAL
1261 * from a time when they were possible.
1263 * For logical decoding only the running xacts information is needed;
1264 * there's no need to look at the locking information, but it's logged anyway,
1265 * as there's no independent knob to just enable logical decoding. For
1266 * details of how this is used, check snapbuild.c's introductory comment.
1269 * Returns the RecPtr of the last inserted record.
1271 XLogRecPtr
1272 LogStandbySnapshot(void)
1274 XLogRecPtr recptr;
1275 RunningTransactions running;
1276 xl_standby_lock *locks;
1277 int nlocks;
1279 Assert(XLogStandbyInfoActive());
1282 * Get details of any AccessExclusiveLocks being held at the moment.
1284 locks = GetRunningTransactionLocks(&nlocks);
1285 if (nlocks > 0)
1286 LogAccessExclusiveLocks(nlocks, locks);
1287 pfree(locks);
1290 * Log details of all in-progress transactions. This should be the last
1291 * record we write, because standby will open up when it sees this.
1293 running = GetRunningTransactionData();
1296 * GetRunningTransactionData() acquired ProcArrayLock, we must release it.
1297 * For Hot Standby this can be done before inserting the WAL record
1298 * because ProcArrayApplyRecoveryInfo() rechecks the commit status using
1299 * the clog. For logical decoding, though, the lock can't be released
1300 * early because the clog might be "in the future" from the POV of the
1301 * historic snapshot. This would allow for situations where we're waiting
1302 * for the end of a transaction listed in the xl_running_xacts record
1303 * which, according to the WAL, has committed before the xl_running_xacts
1304 * record. Fortunately this routine isn't executed frequently, and it's
1305 * only a shared lock.
1307 if (wal_level < WAL_LEVEL_LOGICAL)
1308 LWLockRelease(ProcArrayLock);
1310 recptr = LogCurrentRunningXacts(running);
1312 /* Release lock if we kept it longer ... */
1313 if (wal_level >= WAL_LEVEL_LOGICAL)
1314 LWLockRelease(ProcArrayLock);
1316 /* GetRunningTransactionData() acquired XidGenLock, we must release it */
1317 LWLockRelease(XidGenLock);
1319 return recptr;
1323 * Record an enhanced snapshot of running transactions into WAL.
1325 * The definitions of RunningTransactionsData and xl_running_xacts are
1326 * similar. We keep them separate because xl_running_xacts is a contiguous
1327 * chunk of memory and never exists fully until it is assembled in WAL.
1328 * The inserted records are marked as not being important for durability,
1329 * to avoid triggering superfluous checkpoint / archiving activity.
1331 static XLogRecPtr
1332 LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
1334 xl_running_xacts xlrec;
1335 XLogRecPtr recptr;
1337 xlrec.xcnt = CurrRunningXacts->xcnt;
1338 xlrec.subxcnt = CurrRunningXacts->subxcnt;
1339 xlrec.subxid_overflow = CurrRunningXacts->subxid_overflow;
1340 xlrec.nextXid = CurrRunningXacts->nextXid;
1341 xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
1342 xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
1344 /* Header */
1345 XLogBeginInsert();
1346 XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT);
1347 XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts);
1349 /* array of TransactionIds */
1350 if (xlrec.xcnt > 0)
1351 XLogRegisterData((char *) CurrRunningXacts->xids,
1352 (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId));
1354 recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
1356 if (CurrRunningXacts->subxid_overflow)
1357 elog(trace_recovery(DEBUG2),
1358 "snapshot of %u running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
1359 CurrRunningXacts->xcnt,
1360 LSN_FORMAT_ARGS(recptr),
1361 CurrRunningXacts->oldestRunningXid,
1362 CurrRunningXacts->latestCompletedXid,
1363 CurrRunningXacts->nextXid);
1364 else
1365 elog(trace_recovery(DEBUG2),
1366 "snapshot of %u+%u running transaction ids (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
1367 CurrRunningXacts->xcnt, CurrRunningXacts->subxcnt,
1368 LSN_FORMAT_ARGS(recptr),
1369 CurrRunningXacts->oldestRunningXid,
1370 CurrRunningXacts->latestCompletedXid,
1371 CurrRunningXacts->nextXid);
1374 * Ensure running_xacts information is synced to disk not too far in the
1375 * future. We don't want to stall anything though (i.e. use XLogFlush()),
1376 * so we let the wal writer do it during normal operation.
1377 * XLogSetAsyncXactLSN() conveniently will mark the LSN as to-be-synced
1378 * and nudge the WALWriter into action if sleeping. Check
1379 * XLogBackgroundFlush() for details why a record might not be flushed
1380 * without it.
1382 XLogSetAsyncXactLSN(recptr);
1384 return recptr;
1388 * Wholesale logging of AccessExclusiveLocks. Other lock types need not be
1389 * logged, as described in backend/storage/lmgr/README.
1391 static void
1392 LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
1394 xl_standby_locks xlrec;
1396 xlrec.nlocks = nlocks;
1398 XLogBeginInsert();
1399 XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks));
1400 XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock));
1401 XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT);
1403 (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK);
1407 * Individual logging of AccessExclusiveLocks for use during LockAcquire()
1409 void
1410 LogAccessExclusiveLock(Oid dbOid, Oid relOid)
1412 xl_standby_lock xlrec;
1414 xlrec.xid = GetCurrentTransactionId();
1416 xlrec.dbOid = dbOid;
1417 xlrec.relOid = relOid;
1419 LogAccessExclusiveLocks(1, &xlrec);
1420 MyXactFlags |= XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK;
1424 * Prepare to log an AccessExclusiveLock, for use during LockAcquire()
1426 void
1427 LogAccessExclusiveLockPrepare(void)
1430 * Ensure that a TransactionId has been assigned to this transaction, for
1431 * two reasons, both related to lock release on the standby. First, we
1432 * must assign an xid so that RecordTransactionCommit() and
1433 * RecordTransactionAbort() do not optimise away the transaction
1434 * completion record which recovery relies upon to release locks. It's a
1435 * hack, but for a corner case not worth adding code for into the main
1436 * commit path. Second, we must assign an xid before the lock is recorded
1437 * in shared memory, otherwise a concurrently executing
1438 * GetRunningTransactionLocks() might see a lock associated with an
1439 * InvalidTransactionId which we later assert cannot happen.
1441 (void) GetCurrentTransactionId();
1445 * Emit WAL for invalidations. This currently is only used for commits without
1446 * an xid but which contain invalidations.
1448 void
1449 LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs,
1450 bool relcacheInitFileInval)
1452 xl_invalidations xlrec;
1454 /* prepare record */
1455 memset(&xlrec, 0, sizeof(xlrec));
1456 xlrec.dbId = MyDatabaseId;
1457 xlrec.tsId = MyDatabaseTableSpace;
1458 xlrec.relcacheInitFileInval = relcacheInitFileInval;
1459 xlrec.nmsgs = nmsgs;
1461 /* perform insertion */
1462 XLogBeginInsert();
1463 XLogRegisterData((char *) (&xlrec), MinSizeOfInvalidations);
1464 XLogRegisterData((char *) msgs,
1465 nmsgs * sizeof(SharedInvalidationMessage));
1466 XLogInsert(RM_STANDBY_ID, XLOG_INVALIDATIONS);
1469 /* Return the description of recovery conflict */
1470 static const char *
1471 get_recovery_conflict_desc(ProcSignalReason reason)
1473 const char *reasonDesc = _("unknown reason");
1475 switch (reason)
1477 case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
1478 reasonDesc = _("recovery conflict on buffer pin");
1479 break;
1480 case PROCSIG_RECOVERY_CONFLICT_LOCK:
1481 reasonDesc = _("recovery conflict on lock");
1482 break;
1483 case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
1484 reasonDesc = _("recovery conflict on tablespace");
1485 break;
1486 case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
1487 reasonDesc = _("recovery conflict on snapshot");
1488 break;
1489 case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
1490 reasonDesc = _("recovery conflict on buffer deadlock");
1491 break;
1492 case PROCSIG_RECOVERY_CONFLICT_DATABASE:
1493 reasonDesc = _("recovery conflict on database");
1494 break;
1495 default:
1496 break;
1499 return reasonDesc;