1 /*-------------------------------------------------------------------------
4 * Functions to perform surgery on the damaged heap table.
6 * Copyright (c) 2020-2025, PostgreSQL Global Development Group
9 * contrib/pg_surgery/heap_surgery.c
11 *-------------------------------------------------------------------------
15 #include "access/htup_details.h"
16 #include "access/relation.h"
17 #include "access/visibilitymap.h"
18 #include "access/xloginsert.h"
19 #include "catalog/pg_am_d.h"
20 #include "miscadmin.h"
21 #include "storage/bufmgr.h"
22 #include "utils/acl.h"
23 #include "utils/array.h"
24 #include "utils/rel.h"
28 /* Options to forcefully change the state of a heap tuple. */
29 typedef enum HeapTupleForceOption
33 } HeapTupleForceOption
;
35 PG_FUNCTION_INFO_V1(heap_force_kill
);
36 PG_FUNCTION_INFO_V1(heap_force_freeze
);
38 static int32
tidcmp(const void *a
, const void *b
);
39 static Datum
heap_force_common(FunctionCallInfo fcinfo
,
40 HeapTupleForceOption heap_force_opt
);
41 static void sanity_check_tid_array(ArrayType
*ta
, int *ntids
);
42 static BlockNumber
find_tids_one_page(ItemPointer tids
, int ntids
,
43 OffsetNumber
*next_start_ptr
);
45 /*-------------------------------------------------------------------------
48 * Force kill the tuple(s) pointed to by the item pointer(s) stored in the
51 * Usage: SELECT heap_force_kill(regclass, tid[]);
52 *-------------------------------------------------------------------------
55 heap_force_kill(PG_FUNCTION_ARGS
)
57 PG_RETURN_DATUM(heap_force_common(fcinfo
, HEAP_FORCE_KILL
));
60 /*-------------------------------------------------------------------------
63 * Force freeze the tuple(s) pointed to by the item pointer(s) stored in the
66 * Usage: SELECT heap_force_freeze(regclass, tid[]);
67 *-------------------------------------------------------------------------
70 heap_force_freeze(PG_FUNCTION_ARGS
)
72 PG_RETURN_DATUM(heap_force_common(fcinfo
, HEAP_FORCE_FREEZE
));
75 /*-------------------------------------------------------------------------
78 * Common code for heap_force_kill and heap_force_freeze
79 *-------------------------------------------------------------------------
82 heap_force_common(FunctionCallInfo fcinfo
, HeapTupleForceOption heap_force_opt
)
84 Oid relid
= PG_GETARG_OID(0);
85 ArrayType
*ta
= PG_GETARG_ARRAYTYPE_P_COPY(1);
90 OffsetNumber curr_start_ptr
,
92 bool include_this_tid
[MaxHeapTuplesPerPage
];
94 if (RecoveryInProgress())
96 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
97 errmsg("recovery is in progress"),
98 errhint("Heap surgery functions cannot be executed during recovery.")));
101 sanity_check_tid_array(ta
, &ntids
);
103 rel
= relation_open(relid
, RowExclusiveLock
);
106 * Check target relation.
108 if (!RELKIND_HAS_TABLE_AM(rel
->rd_rel
->relkind
))
110 (errcode(ERRCODE_WRONG_OBJECT_TYPE
),
111 errmsg("cannot operate on relation \"%s\"",
112 RelationGetRelationName(rel
)),
113 errdetail_relkind_not_supported(rel
->rd_rel
->relkind
)));
115 if (rel
->rd_rel
->relam
!= HEAP_TABLE_AM_OID
)
117 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
118 errmsg("only heap AM is supported")));
120 /* Must be owner of the table or superuser. */
121 if (!object_ownercheck(RelationRelationId
, RelationGetRelid(rel
), GetUserId()))
122 aclcheck_error(ACLCHECK_NOT_OWNER
,
123 get_relkind_objtype(rel
->rd_rel
->relkind
),
124 RelationGetRelationName(rel
));
126 tids
= ((ItemPointer
) ARR_DATA_PTR(ta
));
129 * If there is more than one TID in the array, sort them so that we can
130 * easily fetch all the TIDs belonging to one particular page from the
134 qsort(tids
, ntids
, sizeof(ItemPointerData
), tidcmp
);
136 curr_start_ptr
= next_start_ptr
= 0;
137 nblocks
= RelationGetNumberOfBlocks(rel
);
140 * Loop, performing the necessary actions for each block.
142 while (next_start_ptr
!= ntids
)
145 Buffer vmbuf
= InvalidBuffer
;
149 OffsetNumber maxoffset
;
151 bool did_modify_page
= false;
152 bool did_modify_vm
= false;
154 CHECK_FOR_INTERRUPTS();
157 * Find all the TIDs belonging to one particular page starting from
158 * next_start_ptr and process them one by one.
160 blkno
= find_tids_one_page(tids
, ntids
, &next_start_ptr
);
162 /* Check whether the block number is valid. */
163 if (blkno
>= nblocks
)
165 /* Update the current_start_ptr before moving to the next page. */
166 curr_start_ptr
= next_start_ptr
;
169 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
170 errmsg("skipping block %u for relation \"%s\" because the block number is out of range",
171 blkno
, RelationGetRelationName(rel
))));
175 buf
= ReadBuffer(rel
, blkno
);
176 LockBufferForCleanup(buf
);
178 page
= BufferGetPage(buf
);
180 maxoffset
= PageGetMaxOffsetNumber(page
);
183 * Figure out which TIDs we are going to process and which ones we are
186 memset(include_this_tid
, 0, sizeof(include_this_tid
));
187 for (i
= curr_start_ptr
; i
< next_start_ptr
; i
++)
189 OffsetNumber offno
= ItemPointerGetOffsetNumberNoCheck(&tids
[i
]);
192 /* Check whether the offset number is valid. */
193 if (offno
== InvalidOffsetNumber
|| offno
> maxoffset
)
196 errmsg("skipping tid (%u, %u) for relation \"%s\" because the item number is out of range",
197 blkno
, offno
, RelationGetRelationName(rel
)));
201 itemid
= PageGetItemId(page
, offno
);
203 /* Only accept an item ID that is used. */
204 if (ItemIdIsRedirected(itemid
))
207 errmsg("skipping tid (%u, %u) for relation \"%s\" because it redirects to item %u",
208 blkno
, offno
, RelationGetRelationName(rel
),
209 ItemIdGetRedirect(itemid
)));
212 else if (ItemIdIsDead(itemid
))
215 (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked dead",
216 blkno
, offno
, RelationGetRelationName(rel
))));
219 else if (!ItemIdIsUsed(itemid
))
222 (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked unused",
223 blkno
, offno
, RelationGetRelationName(rel
))));
227 /* Mark it for processing. */
228 Assert(offno
< MaxHeapTuplesPerPage
);
229 include_this_tid
[offno
] = true;
233 * Before entering the critical section, pin the visibility map page
234 * if it appears to be necessary.
236 if (heap_force_opt
== HEAP_FORCE_KILL
&& PageIsAllVisible(page
))
237 visibilitymap_pin(rel
, blkno
, &vmbuf
);
239 /* No ereport(ERROR) from here until all the changes are logged. */
240 START_CRIT_SECTION();
242 for (curoff
= FirstOffsetNumber
; curoff
<= maxoffset
;
243 curoff
= OffsetNumberNext(curoff
))
247 if (!include_this_tid
[curoff
])
250 itemid
= PageGetItemId(page
, curoff
);
251 Assert(ItemIdIsNormal(itemid
));
253 did_modify_page
= true;
255 if (heap_force_opt
== HEAP_FORCE_KILL
)
257 ItemIdSetDead(itemid
);
260 * If the page is marked all-visible, we must clear
261 * PD_ALL_VISIBLE flag on the page header and an all-visible
262 * bit on the visibility map corresponding to the page.
264 if (PageIsAllVisible(page
))
266 PageClearAllVisible(page
);
267 visibilitymap_clear(rel
, blkno
, vmbuf
,
268 VISIBILITYMAP_VALID_BITS
);
269 did_modify_vm
= true;
274 HeapTupleHeader htup
;
276 Assert(heap_force_opt
== HEAP_FORCE_FREEZE
);
278 htup
= (HeapTupleHeader
) PageGetItem(page
, itemid
);
281 * Reset all visibility-related fields of the tuple. This
282 * logic should mimic heap_execute_freeze_tuple(), but we
283 * choose to reset xmin and ctid just to be sure that no
284 * potentially-garbled data is left behind.
286 ItemPointerSet(&htup
->t_ctid
, blkno
, curoff
);
287 HeapTupleHeaderSetXmin(htup
, FrozenTransactionId
);
288 HeapTupleHeaderSetXmax(htup
, InvalidTransactionId
);
289 if (htup
->t_infomask
& HEAP_MOVED
)
291 if (htup
->t_infomask
& HEAP_MOVED_OFF
)
292 HeapTupleHeaderSetXvac(htup
, InvalidTransactionId
);
294 HeapTupleHeaderSetXvac(htup
, FrozenTransactionId
);
298 * Clear all the visibility-related bits of this tuple and
299 * mark it as frozen. Also, get rid of HOT_UPDATED and
302 htup
->t_infomask
&= ~HEAP_XACT_MASK
;
303 htup
->t_infomask
|= (HEAP_XMIN_FROZEN
| HEAP_XMAX_INVALID
);
304 htup
->t_infomask2
&= ~HEAP_HOT_UPDATED
;
305 htup
->t_infomask2
&= ~HEAP_KEYS_UPDATED
;
310 * If the page was modified, only then, we mark the buffer dirty or do
315 /* Mark buffer dirty before we write WAL. */
316 MarkBufferDirty(buf
);
319 if (RelationNeedsWAL(rel
))
320 log_newpage_buffer(buf
, true);
323 /* WAL log the VM page if it was modified. */
324 if (did_modify_vm
&& RelationNeedsWAL(rel
))
325 log_newpage_buffer(vmbuf
, false);
329 UnlockReleaseBuffer(buf
);
331 if (vmbuf
!= InvalidBuffer
)
332 ReleaseBuffer(vmbuf
);
334 /* Update the current_start_ptr before moving to the next page. */
335 curr_start_ptr
= next_start_ptr
;
338 relation_close(rel
, RowExclusiveLock
);
345 /*-------------------------------------------------------------------------
348 * Compare two item pointers, return -1, 0, or +1.
350 * See ItemPointerCompare for details.
351 * ------------------------------------------------------------------------
354 tidcmp(const void *a
, const void *b
)
356 ItemPointer iptr1
= ((const ItemPointer
) a
);
357 ItemPointer iptr2
= ((const ItemPointer
) b
);
359 return ItemPointerCompare(iptr1
, iptr2
);
362 /*-------------------------------------------------------------------------
363 * sanity_check_tid_array()
365 * Perform sanity checks on the given tid array, and set *ntids to the
366 * number of items in the array.
367 * ------------------------------------------------------------------------
370 sanity_check_tid_array(ArrayType
*ta
, int *ntids
)
372 if (ARR_HASNULL(ta
) && array_contains_nulls(ta
))
374 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED
),
375 errmsg("array must not contain nulls")));
377 if (ARR_NDIM(ta
) > 1)
379 (errcode(ERRCODE_DATA_EXCEPTION
),
380 errmsg("argument must be empty or one-dimensional array")));
382 *ntids
= ArrayGetNItems(ARR_NDIM(ta
), ARR_DIMS(ta
));
385 /*-------------------------------------------------------------------------
386 * find_tids_one_page()
388 * Find all the tids residing in the same page as tids[next_start_ptr], and
389 * update next_start_ptr so that it points to the first tid in the next page.
391 * NOTE: The input tids[] array must be sorted.
392 * ------------------------------------------------------------------------
395 find_tids_one_page(ItemPointer tids
, int ntids
, OffsetNumber
*next_start_ptr
)
398 BlockNumber prev_blkno
,
401 prev_blkno
= blkno
= InvalidBlockNumber
;
403 for (i
= *next_start_ptr
; i
< ntids
; i
++)
405 ItemPointerData tid
= tids
[i
];
407 blkno
= ItemPointerGetBlockNumberNoCheck(&tid
);
409 if (i
== *next_start_ptr
)
412 if (prev_blkno
!= blkno
)