Cache NO ACTION foreign keys separately from RESTRICT foreign keys
[pgsql.git] / src / backend / access / common / toast_internals.c
blob7d8be8346ce5238e356258994a1123ce9b3da1a7
1 /*-------------------------------------------------------------------------
3 * toast_internals.c
4 * Functions for internal use by the TOAST system.
6 * Copyright (c) 2000-2025, PostgreSQL Global Development Group
8 * IDENTIFICATION
9 * src/backend/access/common/toast_internals.c
11 *-------------------------------------------------------------------------
14 #include "postgres.h"
16 #include "access/detoast.h"
17 #include "access/genam.h"
18 #include "access/heapam.h"
19 #include "access/heaptoast.h"
20 #include "access/table.h"
21 #include "access/toast_internals.h"
22 #include "access/xact.h"
23 #include "catalog/catalog.h"
24 #include "miscadmin.h"
25 #include "utils/fmgroids.h"
26 #include "utils/rel.h"
27 #include "utils/snapmgr.h"
29 static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
30 static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
32 /* ----------
33 * toast_compress_datum -
35 * Create a compressed version of a varlena datum
37 * If we fail (ie, compressed result is actually bigger than original)
38 * then return NULL. We must not use compressed data if it'd expand
39 * the tuple!
41 * We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
42 * copying them. But we can't handle external or compressed datums.
43 * ----------
45 Datum
46 toast_compress_datum(Datum value, char cmethod)
48 struct varlena *tmp = NULL;
49 int32 valsize;
50 ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID;
52 Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
53 Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
55 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
57 /* If the compression method is not valid, use the current default */
58 if (!CompressionMethodIsValid(cmethod))
59 cmethod = default_toast_compression;
62 * Call appropriate compression routine for the compression method.
64 switch (cmethod)
66 case TOAST_PGLZ_COMPRESSION:
67 tmp = pglz_compress_datum((const struct varlena *) value);
68 cmid = TOAST_PGLZ_COMPRESSION_ID;
69 break;
70 case TOAST_LZ4_COMPRESSION:
71 tmp = lz4_compress_datum((const struct varlena *) value);
72 cmid = TOAST_LZ4_COMPRESSION_ID;
73 break;
74 default:
75 elog(ERROR, "invalid compression method %c", cmethod);
78 if (tmp == NULL)
79 return PointerGetDatum(NULL);
82 * We recheck the actual size even if compression reports success, because
83 * it might be satisfied with having saved as little as one byte in the
84 * compressed data --- which could turn into a net loss once you consider
85 * header and alignment padding. Worst case, the compressed format might
86 * require three padding bytes (plus header, which is included in
87 * VARSIZE(tmp)), whereas the uncompressed format would take only one
88 * header byte and no padding if the value is short enough. So we insist
89 * on a savings of more than 2 bytes to ensure we have a gain.
91 if (VARSIZE(tmp) < valsize - 2)
93 /* successful compression */
94 Assert(cmid != TOAST_INVALID_COMPRESSION_ID);
95 TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(tmp, valsize, cmid);
96 return PointerGetDatum(tmp);
98 else
100 /* incompressible data */
101 pfree(tmp);
102 return PointerGetDatum(NULL);
106 /* ----------
107 * toast_save_datum -
109 * Save one single datum into the secondary relation and return
110 * a Datum reference for it.
112 * rel: the main relation we're working with (not the toast rel!)
113 * value: datum to be pushed to toast storage
114 * oldexternal: if not NULL, toast pointer previously representing the datum
115 * options: options to be passed to heap_insert() for toast rows
116 * ----------
118 Datum
119 toast_save_datum(Relation rel, Datum value,
120 struct varlena *oldexternal, int options)
122 Relation toastrel;
123 Relation *toastidxs;
124 HeapTuple toasttup;
125 TupleDesc toasttupDesc;
126 Datum t_values[3];
127 bool t_isnull[3];
128 CommandId mycid = GetCurrentCommandId(true);
129 struct varlena *result;
130 struct varatt_external toast_pointer;
131 union
133 struct varlena hdr;
134 /* this is to make the union big enough for a chunk: */
135 char data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ];
136 /* ensure union is aligned well enough: */
137 int32 align_it;
138 } chunk_data;
139 int32 chunk_size;
140 int32 chunk_seq = 0;
141 char *data_p;
142 int32 data_todo;
143 Pointer dval = DatumGetPointer(value);
144 int num_indexes;
145 int validIndex;
147 Assert(!VARATT_IS_EXTERNAL(value));
150 * Open the toast relation and its indexes. We can use the index to check
151 * uniqueness of the OID we assign to the toasted item, even though it has
152 * additional columns besides OID.
154 toastrel = table_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
155 toasttupDesc = toastrel->rd_att;
157 /* Open all the toast indexes and look for the valid one */
158 validIndex = toast_open_indexes(toastrel,
159 RowExclusiveLock,
160 &toastidxs,
161 &num_indexes);
164 * Get the data pointer and length, and compute va_rawsize and va_extinfo.
166 * va_rawsize is the size of the equivalent fully uncompressed datum, so
167 * we have to adjust for short headers.
169 * va_extinfo stored the actual size of the data payload in the toast
170 * records and the compression method in first 2 bits if data is
171 * compressed.
173 if (VARATT_IS_SHORT(dval))
175 data_p = VARDATA_SHORT(dval);
176 data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT;
177 toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */
178 toast_pointer.va_extinfo = data_todo;
180 else if (VARATT_IS_COMPRESSED(dval))
182 data_p = VARDATA(dval);
183 data_todo = VARSIZE(dval) - VARHDRSZ;
184 /* rawsize in a compressed datum is just the size of the payload */
185 toast_pointer.va_rawsize = VARDATA_COMPRESSED_GET_EXTSIZE(dval) + VARHDRSZ;
187 /* set external size and compression method */
188 VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, data_todo,
189 VARDATA_COMPRESSED_GET_COMPRESS_METHOD(dval));
190 /* Assert that the numbers look like it's compressed */
191 Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
193 else
195 data_p = VARDATA(dval);
196 data_todo = VARSIZE(dval) - VARHDRSZ;
197 toast_pointer.va_rawsize = VARSIZE(dval);
198 toast_pointer.va_extinfo = data_todo;
202 * Insert the correct table OID into the result TOAST pointer.
204 * Normally this is the actual OID of the target toast table, but during
205 * table-rewriting operations such as CLUSTER, we have to insert the OID
206 * of the table's real permanent toast table instead. rd_toastoid is set
207 * if we have to substitute such an OID.
209 if (OidIsValid(rel->rd_toastoid))
210 toast_pointer.va_toastrelid = rel->rd_toastoid;
211 else
212 toast_pointer.va_toastrelid = RelationGetRelid(toastrel);
215 * Choose an OID to use as the value ID for this toast value.
217 * Normally we just choose an unused OID within the toast table. But
218 * during table-rewriting operations where we are preserving an existing
219 * toast table OID, we want to preserve toast value OIDs too. So, if
220 * rd_toastoid is set and we had a prior external value from that same
221 * toast table, re-use its value ID. If we didn't have a prior external
222 * value (which is a corner case, but possible if the table's attstorage
223 * options have been changed), we have to pick a value ID that doesn't
224 * conflict with either new or existing toast value OIDs.
226 if (!OidIsValid(rel->rd_toastoid))
228 /* normal case: just choose an unused OID */
229 toast_pointer.va_valueid =
230 GetNewOidWithIndex(toastrel,
231 RelationGetRelid(toastidxs[validIndex]),
232 (AttrNumber) 1);
234 else
236 /* rewrite case: check to see if value was in old toast table */
237 toast_pointer.va_valueid = InvalidOid;
238 if (oldexternal != NULL)
240 struct varatt_external old_toast_pointer;
242 Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
243 /* Must copy to access aligned fields */
244 VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
245 if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
247 /* This value came from the old toast table; reuse its OID */
248 toast_pointer.va_valueid = old_toast_pointer.va_valueid;
251 * There is a corner case here: the table rewrite might have
252 * to copy both live and recently-dead versions of a row, and
253 * those versions could easily reference the same toast value.
254 * When we copy the second or later version of such a row,
255 * reusing the OID will mean we select an OID that's already
256 * in the new toast table. Check for that, and if so, just
257 * fall through without writing the data again.
259 * While annoying and ugly-looking, this is a good thing
260 * because it ensures that we wind up with only one copy of
261 * the toast value when there is only one copy in the old
262 * toast table. Before we detected this case, we'd have made
263 * multiple copies, wasting space; and what's worse, the
264 * copies belonging to already-deleted heap tuples would not
265 * be reclaimed by VACUUM.
267 if (toastrel_valueid_exists(toastrel,
268 toast_pointer.va_valueid))
270 /* Match, so short-circuit the data storage loop below */
271 data_todo = 0;
275 if (toast_pointer.va_valueid == InvalidOid)
278 * new value; must choose an OID that doesn't conflict in either
279 * old or new toast table
283 toast_pointer.va_valueid =
284 GetNewOidWithIndex(toastrel,
285 RelationGetRelid(toastidxs[validIndex]),
286 (AttrNumber) 1);
287 } while (toastid_valueid_exists(rel->rd_toastoid,
288 toast_pointer.va_valueid));
293 * Initialize constant parts of the tuple data
295 t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid);
296 t_values[2] = PointerGetDatum(&chunk_data);
297 t_isnull[0] = false;
298 t_isnull[1] = false;
299 t_isnull[2] = false;
302 * Split up the item into chunks
304 while (data_todo > 0)
306 int i;
308 CHECK_FOR_INTERRUPTS();
311 * Calculate the size of this chunk
313 chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo);
316 * Build a tuple and store it
318 t_values[1] = Int32GetDatum(chunk_seq++);
319 SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ);
320 memcpy(VARDATA(&chunk_data), data_p, chunk_size);
321 toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
323 heap_insert(toastrel, toasttup, mycid, options, NULL);
326 * Create the index entry. We cheat a little here by not using
327 * FormIndexDatum: this relies on the knowledge that the index columns
328 * are the same as the initial columns of the table for all the
329 * indexes. We also cheat by not providing an IndexInfo: this is okay
330 * for now because btree doesn't need one, but we might have to be
331 * more honest someday.
333 * Note also that there had better not be any user-created index on
334 * the TOAST table, since we don't bother to update anything else.
336 for (i = 0; i < num_indexes; i++)
338 /* Only index relations marked as ready can be updated */
339 if (toastidxs[i]->rd_index->indisready)
340 index_insert(toastidxs[i], t_values, t_isnull,
341 &(toasttup->t_self),
342 toastrel,
343 toastidxs[i]->rd_index->indisunique ?
344 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
345 false, NULL);
349 * Free memory
351 heap_freetuple(toasttup);
354 * Move on to next chunk
356 data_todo -= chunk_size;
357 data_p += chunk_size;
361 * Done - close toast relation and its indexes but keep the lock until
362 * commit, so as a concurrent reindex done directly on the toast relation
363 * would be able to wait for this transaction.
365 toast_close_indexes(toastidxs, num_indexes, NoLock);
366 table_close(toastrel, NoLock);
369 * Create the TOAST pointer value that we'll return
371 result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
372 SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
373 memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
375 return PointerGetDatum(result);
378 /* ----------
379 * toast_delete_datum -
381 * Delete a single external stored value.
382 * ----------
384 void
385 toast_delete_datum(Relation rel, Datum value, bool is_speculative)
387 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
388 struct varatt_external toast_pointer;
389 Relation toastrel;
390 Relation *toastidxs;
391 ScanKeyData toastkey;
392 SysScanDesc toastscan;
393 HeapTuple toasttup;
394 int num_indexes;
395 int validIndex;
397 if (!VARATT_IS_EXTERNAL_ONDISK(attr))
398 return;
400 /* Must copy to access aligned fields */
401 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
404 * Open the toast relation and its indexes
406 toastrel = table_open(toast_pointer.va_toastrelid, RowExclusiveLock);
408 /* Fetch valid relation used for process */
409 validIndex = toast_open_indexes(toastrel,
410 RowExclusiveLock,
411 &toastidxs,
412 &num_indexes);
415 * Setup a scan key to find chunks with matching va_valueid
417 ScanKeyInit(&toastkey,
418 (AttrNumber) 1,
419 BTEqualStrategyNumber, F_OIDEQ,
420 ObjectIdGetDatum(toast_pointer.va_valueid));
423 * Find all the chunks. (We don't actually care whether we see them in
424 * sequence or not, but since we've already locked the index we might as
425 * well use systable_beginscan_ordered.)
427 toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
428 get_toast_snapshot(), 1, &toastkey);
429 while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
432 * Have a chunk, delete it
434 if (is_speculative)
435 heap_abort_speculative(toastrel, &toasttup->t_self);
436 else
437 simple_heap_delete(toastrel, &toasttup->t_self);
441 * End scan and close relations but keep the lock until commit, so as a
442 * concurrent reindex done directly on the toast relation would be able to
443 * wait for this transaction.
445 systable_endscan_ordered(toastscan);
446 toast_close_indexes(toastidxs, num_indexes, NoLock);
447 table_close(toastrel, NoLock);
450 /* ----------
451 * toastrel_valueid_exists -
453 * Test whether a toast value with the given ID exists in the toast relation.
454 * For safety, we consider a value to exist if there are either live or dead
455 * toast rows with that ID; see notes for GetNewOidWithIndex().
456 * ----------
458 static bool
459 toastrel_valueid_exists(Relation toastrel, Oid valueid)
461 bool result = false;
462 ScanKeyData toastkey;
463 SysScanDesc toastscan;
464 int num_indexes;
465 int validIndex;
466 Relation *toastidxs;
468 /* Fetch a valid index relation */
469 validIndex = toast_open_indexes(toastrel,
470 RowExclusiveLock,
471 &toastidxs,
472 &num_indexes);
475 * Setup a scan key to find chunks with matching va_valueid
477 ScanKeyInit(&toastkey,
478 (AttrNumber) 1,
479 BTEqualStrategyNumber, F_OIDEQ,
480 ObjectIdGetDatum(valueid));
483 * Is there any such chunk?
485 toastscan = systable_beginscan(toastrel,
486 RelationGetRelid(toastidxs[validIndex]),
487 true, SnapshotAny, 1, &toastkey);
489 if (systable_getnext(toastscan) != NULL)
490 result = true;
492 systable_endscan(toastscan);
494 /* Clean up */
495 toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
497 return result;
500 /* ----------
501 * toastid_valueid_exists -
503 * As above, but work from toast rel's OID not an open relation
504 * ----------
506 static bool
507 toastid_valueid_exists(Oid toastrelid, Oid valueid)
509 bool result;
510 Relation toastrel;
512 toastrel = table_open(toastrelid, AccessShareLock);
514 result = toastrel_valueid_exists(toastrel, valueid);
516 table_close(toastrel, AccessShareLock);
518 return result;
521 /* ----------
522 * toast_get_valid_index
524 * Get OID of valid index associated to given toast relation. A toast
525 * relation can have only one valid index at the same time.
528 toast_get_valid_index(Oid toastoid, LOCKMODE lock)
530 int num_indexes;
531 int validIndex;
532 Oid validIndexOid;
533 Relation *toastidxs;
534 Relation toastrel;
536 /* Open the toast relation */
537 toastrel = table_open(toastoid, lock);
539 /* Look for the valid index of the toast relation */
540 validIndex = toast_open_indexes(toastrel,
541 lock,
542 &toastidxs,
543 &num_indexes);
544 validIndexOid = RelationGetRelid(toastidxs[validIndex]);
546 /* Close the toast relation and all its indexes */
547 toast_close_indexes(toastidxs, num_indexes, NoLock);
548 table_close(toastrel, NoLock);
550 return validIndexOid;
553 /* ----------
554 * toast_open_indexes
556 * Get an array of the indexes associated to the given toast relation
557 * and return as well the position of the valid index used by the toast
558 * relation in this array. It is the responsibility of the caller of this
559 * function to close the indexes as well as free them.
562 toast_open_indexes(Relation toastrel,
563 LOCKMODE lock,
564 Relation **toastidxs,
565 int *num_indexes)
567 int i = 0;
568 int res = 0;
569 bool found = false;
570 List *indexlist;
571 ListCell *lc;
573 /* Get index list of the toast relation */
574 indexlist = RelationGetIndexList(toastrel);
575 Assert(indexlist != NIL);
577 *num_indexes = list_length(indexlist);
579 /* Open all the index relations */
580 *toastidxs = (Relation *) palloc(*num_indexes * sizeof(Relation));
581 foreach(lc, indexlist)
582 (*toastidxs)[i++] = index_open(lfirst_oid(lc), lock);
584 /* Fetch the first valid index in list */
585 for (i = 0; i < *num_indexes; i++)
587 Relation toastidx = (*toastidxs)[i];
589 if (toastidx->rd_index->indisvalid)
591 res = i;
592 found = true;
593 break;
598 * Free index list, not necessary anymore as relations are opened and a
599 * valid index has been found.
601 list_free(indexlist);
604 * The toast relation should have one valid index, so something is going
605 * wrong if there is nothing.
607 if (!found)
608 elog(ERROR, "no valid index found for toast relation with Oid %u",
609 RelationGetRelid(toastrel));
611 return res;
614 /* ----------
615 * toast_close_indexes
617 * Close an array of indexes for a toast relation and free it. This should
618 * be called for a set of indexes opened previously with toast_open_indexes.
620 void
621 toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
623 int i;
625 /* Close relations and clean up things */
626 for (i = 0; i < num_indexes; i++)
627 index_close(toastidxs[i], lock);
628 pfree(toastidxs);
631 /* ----------
632 * get_toast_snapshot
634 * Return the TOAST snapshot. Detoasting *must* happen in the same
635 * transaction that originally fetched the toast pointer.
637 Snapshot
638 get_toast_snapshot(void)
641 * We cannot directly check that detoasting happens in the same
642 * transaction that originally fetched the toast pointer, but at least
643 * check that the session has some active snapshots. It might not if, for
644 * example, a procedure fetches a toasted value into a local variable,
645 * commits, and then tries to detoast the value. Such coding is unsafe,
646 * because once we commit there is nothing to prevent the toast data from
647 * being deleted. (This is not very much protection, because in many
648 * scenarios the procedure would have already created a new transaction
649 * snapshot, preventing us from detecting the problem. But it's better
650 * than nothing.)
652 if (!HaveRegisteredOrActiveSnapshot())
653 elog(ERROR, "cannot fetch toast data without an active snapshot");
655 return &SnapshotToastData;