1 /*-------------------------------------------------------------------------
4 * PostgreSQL subtransaction-log manager
6 * The pg_subtrans manager is a pg_xact-like manager that stores the parent
7 * transaction Id for each transaction. It is a fundamental part of the
8 * nested transactions implementation. A main transaction has a parent
9 * of InvalidTransactionId, and each subtransaction has its immediate parent.
10 * The tree can easily be walked from child to parent, but not in the
13 * This code is based on xact.c, but the robustness requirements
14 * are completely different from pg_xact, because we only need to remember
15 * pg_subtrans information for currently-open transactions. Thus, there is
16 * no need to preserve data over a crash and restart.
18 * There are no XLOG interactions since we do not care about preserving
19 * data across crashes. During database startup, we simply force the
20 * currently-active page of SUBTRANS to zeroes.
22 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
23 * Portions Copyright (c) 1994, Regents of the University of California
25 * src/backend/access/transam/subtrans.c
27 *-------------------------------------------------------------------------
31 #include "access/slru.h"
32 #include "access/subtrans.h"
33 #include "access/transam.h"
34 #include "miscadmin.h"
36 #include "utils/guc_hooks.h"
37 #include "utils/snapmgr.h"
41 * Defines for SubTrans page sizes. A page is the same BLCKSZ as is used
42 * everywhere else in Postgres.
44 * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
45 * SubTrans page numbering also wraps around at
46 * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE, and segment numbering at
47 * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need take no
48 * explicit notice of that fact in this module, except when comparing segment
49 * and page numbers in TruncateSUBTRANS (see SubTransPagePrecedes) and zeroing
50 * them in StartupSUBTRANS.
53 /* We need four bytes per xact */
54 #define SUBTRANS_XACTS_PER_PAGE (BLCKSZ / sizeof(TransactionId))
57 * Although we return an int64 the actual value can't currently exceed
58 * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE.
61 TransactionIdToPage(TransactionId xid
)
63 return xid
/ (int64
) SUBTRANS_XACTS_PER_PAGE
;
66 #define TransactionIdToEntry(xid) ((xid) % (TransactionId) SUBTRANS_XACTS_PER_PAGE)
70 * Link to shared-memory data structures for SUBTRANS control
72 static SlruCtlData SubTransCtlData
;
74 #define SubTransCtl (&SubTransCtlData)
77 static int ZeroSUBTRANSPage(int64 pageno
);
78 static bool SubTransPagePrecedes(int64 page1
, int64 page2
);
82 * Record the parent of a subtransaction in the subtrans log.
85 SubTransSetParent(TransactionId xid
, TransactionId parent
)
87 int64 pageno
= TransactionIdToPage(xid
);
88 int entryno
= TransactionIdToEntry(xid
);
93 Assert(TransactionIdIsValid(parent
));
94 Assert(TransactionIdFollows(xid
, parent
));
96 lock
= SimpleLruGetBankLock(SubTransCtl
, pageno
);
97 LWLockAcquire(lock
, LW_EXCLUSIVE
);
99 slotno
= SimpleLruReadPage(SubTransCtl
, pageno
, true, xid
);
100 ptr
= (TransactionId
*) SubTransCtl
->shared
->page_buffer
[slotno
];
104 * It's possible we'll try to set the parent xid multiple times but we
105 * shouldn't ever be changing the xid from one valid xid to another valid
106 * xid, which would corrupt the data structure.
110 Assert(*ptr
== InvalidTransactionId
);
112 SubTransCtl
->shared
->page_dirty
[slotno
] = true;
119 * Interrogate the parent of a transaction in the subtrans log.
122 SubTransGetParent(TransactionId xid
)
124 int64 pageno
= TransactionIdToPage(xid
);
125 int entryno
= TransactionIdToEntry(xid
);
128 TransactionId parent
;
130 /* Can't ask about stuff that might not be around anymore */
131 Assert(TransactionIdFollowsOrEquals(xid
, TransactionXmin
));
133 /* Bootstrap and frozen XIDs have no parent */
134 if (!TransactionIdIsNormal(xid
))
135 return InvalidTransactionId
;
137 /* lock is acquired by SimpleLruReadPage_ReadOnly */
139 slotno
= SimpleLruReadPage_ReadOnly(SubTransCtl
, pageno
, xid
);
140 ptr
= (TransactionId
*) SubTransCtl
->shared
->page_buffer
[slotno
];
145 LWLockRelease(SimpleLruGetBankLock(SubTransCtl
, pageno
));
151 * SubTransGetTopmostTransaction
153 * Returns the topmost transaction of the given transaction id.
155 * Because we cannot look back further than TransactionXmin, it is possible
156 * that this function will lie and return an intermediate subtransaction ID
157 * instead of the true topmost parent ID. This is OK, because in practice
158 * we only care about detecting whether the topmost parent is still running
159 * or is part of a current snapshot's list of still-running transactions.
160 * Therefore, any XID before TransactionXmin is as good as any other.
163 SubTransGetTopmostTransaction(TransactionId xid
)
165 TransactionId parentXid
= xid
,
168 /* Can't ask about stuff that might not be around anymore */
169 Assert(TransactionIdFollowsOrEquals(xid
, TransactionXmin
));
171 while (TransactionIdIsValid(parentXid
))
173 previousXid
= parentXid
;
174 if (TransactionIdPrecedes(parentXid
, TransactionXmin
))
176 parentXid
= SubTransGetParent(parentXid
);
179 * By convention the parent xid gets allocated first, so should always
180 * precede the child xid. Anything else points to a corrupted data
181 * structure that could lead to an infinite loop, so exit.
183 if (!TransactionIdPrecedes(parentXid
, previousXid
))
184 elog(ERROR
, "pg_subtrans contains invalid entry: xid %u points to parent xid %u",
185 previousXid
, parentXid
);
188 Assert(TransactionIdIsValid(previousXid
));
194 * Number of shared SUBTRANS buffers.
196 * If asked to autotune, use 2MB for every 1GB of shared buffers, up to 8MB.
197 * Otherwise just cap the configured amount to be between 16 and the maximum
201 SUBTRANSShmemBuffers(void)
203 /* auto-tune based on shared buffers */
204 if (subtransaction_buffers
== 0)
205 return SimpleLruAutotuneBuffers(512, 1024);
207 return Min(Max(16, subtransaction_buffers
), SLRU_MAX_ALLOWED_BUFFERS
);
211 * Initialization of shared memory for SUBTRANS
214 SUBTRANSShmemSize(void)
216 return SimpleLruShmemSize(SUBTRANSShmemBuffers(), 0);
220 SUBTRANSShmemInit(void)
222 /* If auto-tuning is requested, now is the time to do it */
223 if (subtransaction_buffers
== 0)
227 snprintf(buf
, sizeof(buf
), "%d", SUBTRANSShmemBuffers());
228 SetConfigOption("subtransaction_buffers", buf
, PGC_POSTMASTER
,
229 PGC_S_DYNAMIC_DEFAULT
);
232 * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
233 * However, if the DBA explicitly set subtransaction_buffers = 0 in
234 * the config file, then PGC_S_DYNAMIC_DEFAULT will fail to override
235 * that and we must force the matter with PGC_S_OVERRIDE.
237 if (subtransaction_buffers
== 0) /* failed to apply it? */
238 SetConfigOption("subtransaction_buffers", buf
, PGC_POSTMASTER
,
241 Assert(subtransaction_buffers
!= 0);
243 SubTransCtl
->PagePrecedes
= SubTransPagePrecedes
;
244 SimpleLruInit(SubTransCtl
, "subtransaction", SUBTRANSShmemBuffers(), 0,
245 "pg_subtrans", LWTRANCHE_SUBTRANS_BUFFER
,
246 LWTRANCHE_SUBTRANS_SLRU
, SYNC_HANDLER_NONE
, false);
247 SlruPagePrecedesUnitTests(SubTransCtl
, SUBTRANS_XACTS_PER_PAGE
);
251 * GUC check_hook for subtransaction_buffers
254 check_subtrans_buffers(int *newval
, void **extra
, GucSource source
)
256 return check_slru_buffers("subtransaction_buffers", newval
);
260 * This func must be called ONCE on system install. It creates
261 * the initial SUBTRANS segment. (The SUBTRANS directory is assumed to
262 * have been created by the initdb shell script, and SUBTRANSShmemInit
263 * must have been called already.)
265 * Note: it's not really necessary to create the initial segment now,
266 * since slru.c would create it on first write anyway. But we may as well
267 * do it to be sure the directory is set up correctly.
270 BootStrapSUBTRANS(void)
273 LWLock
*lock
= SimpleLruGetBankLock(SubTransCtl
, 0);
275 LWLockAcquire(lock
, LW_EXCLUSIVE
);
277 /* Create and zero the first page of the subtrans log */
278 slotno
= ZeroSUBTRANSPage(0);
280 /* Make sure it's written out */
281 SimpleLruWritePage(SubTransCtl
, slotno
);
282 Assert(!SubTransCtl
->shared
->page_dirty
[slotno
]);
288 * Initialize (or reinitialize) a page of SUBTRANS to zeroes.
290 * The page is not actually written, just set up in shared memory.
291 * The slot number of the new page is returned.
293 * Control lock must be held at entry, and will be held at exit.
296 ZeroSUBTRANSPage(int64 pageno
)
298 return SimpleLruZeroPage(SubTransCtl
, pageno
);
302 * This must be called ONCE during postmaster or standalone-backend startup,
303 * after StartupXLOG has initialized TransamVariables->nextXid.
305 * oldestActiveXID is the oldest XID of any prepared transaction, or nextXid
309 StartupSUBTRANS(TransactionId oldestActiveXID
)
311 FullTransactionId nextXid
;
314 LWLock
*prevlock
= NULL
;
318 * Since we don't expect pg_subtrans to be valid across crashes, we
319 * initialize the currently-active page(s) to zeroes during startup.
320 * Whenever we advance into a new page, ExtendSUBTRANS will likewise zero
321 * the new page without regard to whatever was previously on disk.
323 startPage
= TransactionIdToPage(oldestActiveXID
);
324 nextXid
= TransamVariables
->nextXid
;
325 endPage
= TransactionIdToPage(XidFromFullTransactionId(nextXid
));
329 lock
= SimpleLruGetBankLock(SubTransCtl
, startPage
);
330 if (prevlock
!= lock
)
333 LWLockRelease(prevlock
);
334 LWLockAcquire(lock
, LW_EXCLUSIVE
);
338 (void) ZeroSUBTRANSPage(startPage
);
339 if (startPage
== endPage
)
343 /* must account for wraparound */
344 if (startPage
> TransactionIdToPage(MaxTransactionId
))
352 * Perform a checkpoint --- either during shutdown, or on-the-fly
355 CheckPointSUBTRANS(void)
358 * Write dirty SUBTRANS pages to disk
360 * This is not actually necessary from a correctness point of view. We do
361 * it merely to improve the odds that writing of dirty pages is done by
362 * the checkpoint process and not by backends.
364 TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_START(true);
365 SimpleLruWriteAll(SubTransCtl
, true);
366 TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_DONE(true);
371 * Make sure that SUBTRANS has room for a newly-allocated XID.
373 * NB: this is called while holding XidGenLock. We want it to be very fast
374 * most of the time; even when it's not so fast, no actual I/O need happen
375 * unless we're forced to write out a dirty subtrans page to make room
379 ExtendSUBTRANS(TransactionId newestXact
)
385 * No work except at first XID of a page. But beware: just after
386 * wraparound, the first XID of page zero is FirstNormalTransactionId.
388 if (TransactionIdToEntry(newestXact
) != 0 &&
389 !TransactionIdEquals(newestXact
, FirstNormalTransactionId
))
392 pageno
= TransactionIdToPage(newestXact
);
394 lock
= SimpleLruGetBankLock(SubTransCtl
, pageno
);
395 LWLockAcquire(lock
, LW_EXCLUSIVE
);
398 ZeroSUBTRANSPage(pageno
);
405 * Remove all SUBTRANS segments before the one holding the passed transaction ID
407 * oldestXact is the oldest TransactionXmin of any running transaction. This
408 * is called only during checkpoint.
411 TruncateSUBTRANS(TransactionId oldestXact
)
416 * The cutoff point is the start of the segment containing oldestXact. We
417 * pass the *page* containing oldestXact to SimpleLruTruncate. We step
418 * back one transaction to avoid passing a cutoff page that hasn't been
419 * created yet in the rare case that oldestXact would be the first item on
420 * a page and oldestXact == next XID. In that case, if we didn't subtract
421 * one, we'd trigger SimpleLruTruncate's wraparound detection.
423 TransactionIdRetreat(oldestXact
);
424 cutoffPage
= TransactionIdToPage(oldestXact
);
426 SimpleLruTruncate(SubTransCtl
, cutoffPage
);
431 * Decide whether a SUBTRANS page number is "older" for truncation purposes.
432 * Analogous to CLOGPagePrecedes().
435 SubTransPagePrecedes(int64 page1
, int64 page2
)
440 xid1
= ((TransactionId
) page1
) * SUBTRANS_XACTS_PER_PAGE
;
441 xid1
+= FirstNormalTransactionId
+ 1;
442 xid2
= ((TransactionId
) page2
) * SUBTRANS_XACTS_PER_PAGE
;
443 xid2
+= FirstNormalTransactionId
+ 1;
445 return (TransactionIdPrecedes(xid1
, xid2
) &&
446 TransactionIdPrecedes(xid1
, xid2
+ SUBTRANS_XACTS_PER_PAGE
- 1));