1 /*-------------------------------------------------------------------------
4 * Retrieve compressed or external variable size attributes.
6 * Copyright (c) 2000-2025, PostgreSQL Global Development Group
9 * src/backend/access/common/detoast.c
11 *-------------------------------------------------------------------------
16 #include "access/detoast.h"
17 #include "access/table.h"
18 #include "access/tableam.h"
19 #include "access/toast_internals.h"
20 #include "common/int.h"
21 #include "common/pg_lzcompress.h"
22 #include "utils/expandeddatum.h"
23 #include "utils/rel.h"
25 static struct varlena
*toast_fetch_datum(struct varlena
*attr
);
26 static struct varlena
*toast_fetch_datum_slice(struct varlena
*attr
,
29 static struct varlena
*toast_decompress_datum(struct varlena
*attr
);
30 static struct varlena
*toast_decompress_datum_slice(struct varlena
*attr
, int32 slicelength
);
33 * detoast_external_attr -
35 * Public entry point to get back a toasted value from
36 * external source (possibly still in compressed format).
38 * This will return a datum that contains all the data internally, ie, not
39 * relying on external storage or memory, but it can still be compressed or
40 * have a short header. Note some callers assume that if the input is an
41 * EXTERNAL datum, the result will be a pfree'able chunk.
45 detoast_external_attr(struct varlena
*attr
)
47 struct varlena
*result
;
49 if (VARATT_IS_EXTERNAL_ONDISK(attr
))
52 * This is an external stored plain value
54 result
= toast_fetch_datum(attr
);
56 else if (VARATT_IS_EXTERNAL_INDIRECT(attr
))
59 * This is an indirect pointer --- dereference it
61 struct varatt_indirect redirect
;
63 VARATT_EXTERNAL_GET_POINTER(redirect
, attr
);
64 attr
= (struct varlena
*) redirect
.pointer
;
66 /* nested indirect Datums aren't allowed */
67 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr
));
69 /* recurse if value is still external in some other way */
70 if (VARATT_IS_EXTERNAL(attr
))
71 return detoast_external_attr(attr
);
74 * Copy into the caller's memory context, in case caller tries to
77 result
= (struct varlena
*) palloc(VARSIZE_ANY(attr
));
78 memcpy(result
, attr
, VARSIZE_ANY(attr
));
80 else if (VARATT_IS_EXTERNAL_EXPANDED(attr
))
83 * This is an expanded-object pointer --- get flat format
85 ExpandedObjectHeader
*eoh
;
88 eoh
= DatumGetEOHP(PointerGetDatum(attr
));
89 resultsize
= EOH_get_flat_size(eoh
);
90 result
= (struct varlena
*) palloc(resultsize
);
91 EOH_flatten_into(eoh
, result
, resultsize
);
96 * This is a plain value inside of the main tuple - why am I called?
108 * Public entry point to get back a toasted value from compression
109 * or external storage. The result is always non-extended varlena form.
111 * Note some callers assume that if the input is an EXTERNAL or COMPRESSED
112 * datum, the result will be a pfree'able chunk.
116 detoast_attr(struct varlena
*attr
)
118 if (VARATT_IS_EXTERNAL_ONDISK(attr
))
121 * This is an externally stored datum --- fetch it back from there
123 attr
= toast_fetch_datum(attr
);
124 /* If it's compressed, decompress it */
125 if (VARATT_IS_COMPRESSED(attr
))
127 struct varlena
*tmp
= attr
;
129 attr
= toast_decompress_datum(tmp
);
133 else if (VARATT_IS_EXTERNAL_INDIRECT(attr
))
136 * This is an indirect pointer --- dereference it
138 struct varatt_indirect redirect
;
140 VARATT_EXTERNAL_GET_POINTER(redirect
, attr
);
141 attr
= (struct varlena
*) redirect
.pointer
;
143 /* nested indirect Datums aren't allowed */
144 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr
));
146 /* recurse in case value is still extended in some other way */
147 attr
= detoast_attr(attr
);
149 /* if it isn't, we'd better copy it */
150 if (attr
== (struct varlena
*) redirect
.pointer
)
152 struct varlena
*result
;
154 result
= (struct varlena
*) palloc(VARSIZE_ANY(attr
));
155 memcpy(result
, attr
, VARSIZE_ANY(attr
));
159 else if (VARATT_IS_EXTERNAL_EXPANDED(attr
))
162 * This is an expanded-object pointer --- get flat format
164 attr
= detoast_external_attr(attr
);
165 /* flatteners are not allowed to produce compressed/short output */
166 Assert(!VARATT_IS_EXTENDED(attr
));
168 else if (VARATT_IS_COMPRESSED(attr
))
171 * This is a compressed value inside of the main tuple
173 attr
= toast_decompress_datum(attr
);
175 else if (VARATT_IS_SHORT(attr
))
178 * This is a short-header varlena --- convert to 4-byte header format
180 Size data_size
= VARSIZE_SHORT(attr
) - VARHDRSZ_SHORT
;
181 Size new_size
= data_size
+ VARHDRSZ
;
182 struct varlena
*new_attr
;
184 new_attr
= (struct varlena
*) palloc(new_size
);
185 SET_VARSIZE(new_attr
, new_size
);
186 memcpy(VARDATA(new_attr
), VARDATA_SHORT(attr
), data_size
);
195 * detoast_attr_slice -
197 * Public entry point to get back part of a toasted value
198 * from compression or external storage.
200 * sliceoffset is where to start (zero or more)
201 * If slicelength < 0, return everything beyond sliceoffset
205 detoast_attr_slice(struct varlena
*attr
,
206 int32 sliceoffset
, int32 slicelength
)
208 struct varlena
*preslice
;
209 struct varlena
*result
;
215 elog(ERROR
, "invalid sliceoffset: %d", sliceoffset
);
218 * Compute slicelimit = offset + length, or -1 if we must fetch all of the
219 * value. In case of integer overflow, we must fetch all.
223 else if (pg_add_s32_overflow(sliceoffset
, slicelength
, &slicelimit
))
224 slicelength
= slicelimit
= -1;
226 if (VARATT_IS_EXTERNAL_ONDISK(attr
))
228 struct varatt_external toast_pointer
;
230 VARATT_EXTERNAL_GET_POINTER(toast_pointer
, attr
);
232 /* fast path for non-compressed external datums */
233 if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer
))
234 return toast_fetch_datum_slice(attr
, sliceoffset
, slicelength
);
237 * For compressed values, we need to fetch enough slices to decompress
238 * at least the requested part (when a prefix is requested).
239 * Otherwise, just fetch all slices.
243 int32 max_size
= VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer
);
246 * Determine maximum amount of compressed data needed for a prefix
247 * of a given length (after decompression).
249 * At least for now, if it's LZ4 data, we'll have to fetch the
250 * whole thing, because there doesn't seem to be an API call to
251 * determine how much compressed data we need to be sure of being
252 * able to decompress the required slice.
254 if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer
) ==
255 TOAST_PGLZ_COMPRESSION_ID
)
256 max_size
= pglz_maximum_compressed_size(slicelimit
, max_size
);
259 * Fetch enough compressed slices (compressed marker will get set
262 preslice
= toast_fetch_datum_slice(attr
, 0, max_size
);
265 preslice
= toast_fetch_datum(attr
);
267 else if (VARATT_IS_EXTERNAL_INDIRECT(attr
))
269 struct varatt_indirect redirect
;
271 VARATT_EXTERNAL_GET_POINTER(redirect
, attr
);
273 /* nested indirect Datums aren't allowed */
274 Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect
.pointer
));
276 return detoast_attr_slice(redirect
.pointer
,
277 sliceoffset
, slicelength
);
279 else if (VARATT_IS_EXTERNAL_EXPANDED(attr
))
281 /* pass it off to detoast_external_attr to flatten */
282 preslice
= detoast_external_attr(attr
);
287 Assert(!VARATT_IS_EXTERNAL(preslice
));
289 if (VARATT_IS_COMPRESSED(preslice
))
291 struct varlena
*tmp
= preslice
;
293 /* Decompress enough to encompass the slice and the offset */
295 preslice
= toast_decompress_datum_slice(tmp
, slicelimit
);
297 preslice
= toast_decompress_datum(tmp
);
303 if (VARATT_IS_SHORT(preslice
))
305 attrdata
= VARDATA_SHORT(preslice
);
306 attrsize
= VARSIZE_SHORT(preslice
) - VARHDRSZ_SHORT
;
310 attrdata
= VARDATA(preslice
);
311 attrsize
= VARSIZE(preslice
) - VARHDRSZ
;
314 /* slicing of datum for compressed cases and plain value */
316 if (sliceoffset
>= attrsize
)
321 else if (slicelength
< 0 || slicelimit
> attrsize
)
322 slicelength
= attrsize
- sliceoffset
;
324 result
= (struct varlena
*) palloc(slicelength
+ VARHDRSZ
);
325 SET_VARSIZE(result
, slicelength
+ VARHDRSZ
);
327 memcpy(VARDATA(result
), attrdata
+ sliceoffset
, slicelength
);
329 if (preslice
!= attr
)
336 * toast_fetch_datum -
338 * Reconstruct an in memory Datum from the chunks saved
339 * in the toast relation
342 static struct varlena
*
343 toast_fetch_datum(struct varlena
*attr
)
346 struct varlena
*result
;
347 struct varatt_external toast_pointer
;
350 if (!VARATT_IS_EXTERNAL_ONDISK(attr
))
351 elog(ERROR
, "toast_fetch_datum shouldn't be called for non-ondisk datums");
353 /* Must copy to access aligned fields */
354 VARATT_EXTERNAL_GET_POINTER(toast_pointer
, attr
);
356 attrsize
= VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer
);
358 result
= (struct varlena
*) palloc(attrsize
+ VARHDRSZ
);
360 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer
))
361 SET_VARSIZE_COMPRESSED(result
, attrsize
+ VARHDRSZ
);
363 SET_VARSIZE(result
, attrsize
+ VARHDRSZ
);
366 return result
; /* Probably shouldn't happen, but just in
370 * Open the toast relation and its indexes
372 toastrel
= table_open(toast_pointer
.va_toastrelid
, AccessShareLock
);
374 /* Fetch all chunks */
375 table_relation_fetch_toast_slice(toastrel
, toast_pointer
.va_valueid
,
376 attrsize
, 0, attrsize
, result
);
378 /* Close toast table */
379 table_close(toastrel
, AccessShareLock
);
385 * toast_fetch_datum_slice -
387 * Reconstruct a segment of a Datum from the chunks saved
388 * in the toast relation
390 * Note that this function supports non-compressed external datums
391 * and compressed external datums (in which case the requested slice
392 * has to be a prefix, i.e. sliceoffset has to be 0).
395 static struct varlena
*
396 toast_fetch_datum_slice(struct varlena
*attr
, int32 sliceoffset
,
400 struct varlena
*result
;
401 struct varatt_external toast_pointer
;
404 if (!VARATT_IS_EXTERNAL_ONDISK(attr
))
405 elog(ERROR
, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
407 /* Must copy to access aligned fields */
408 VARATT_EXTERNAL_GET_POINTER(toast_pointer
, attr
);
411 * It's nonsense to fetch slices of a compressed datum unless when it's a
412 * prefix -- this isn't lo_* we can't return a compressed datum which is
413 * meaningful to toast later.
415 Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer
) || 0 == sliceoffset
);
417 attrsize
= VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer
);
419 if (sliceoffset
>= attrsize
)
426 * When fetching a prefix of a compressed external datum, account for the
427 * space required by va_tcinfo, which is stored at the beginning as an
430 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer
) && slicelength
> 0)
431 slicelength
= slicelength
+ sizeof(int32
);
434 * Adjust length request if needed. (Note: our sole caller,
435 * detoast_attr_slice, protects us against sliceoffset + slicelength
438 if (((sliceoffset
+ slicelength
) > attrsize
) || slicelength
< 0)
439 slicelength
= attrsize
- sliceoffset
;
441 result
= (struct varlena
*) palloc(slicelength
+ VARHDRSZ
);
443 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer
))
444 SET_VARSIZE_COMPRESSED(result
, slicelength
+ VARHDRSZ
);
446 SET_VARSIZE(result
, slicelength
+ VARHDRSZ
);
448 if (slicelength
== 0)
449 return result
; /* Can save a lot of work at this point! */
451 /* Open the toast relation */
452 toastrel
= table_open(toast_pointer
.va_toastrelid
, AccessShareLock
);
454 /* Fetch all chunks */
455 table_relation_fetch_toast_slice(toastrel
, toast_pointer
.va_valueid
,
456 attrsize
, sliceoffset
, slicelength
,
459 /* Close toast table */
460 table_close(toastrel
, AccessShareLock
);
466 * toast_decompress_datum -
468 * Decompress a compressed version of a varlena datum
470 static struct varlena
*
471 toast_decompress_datum(struct varlena
*attr
)
473 ToastCompressionId cmid
;
475 Assert(VARATT_IS_COMPRESSED(attr
));
478 * Fetch the compression method id stored in the compression header and
479 * decompress the data using the appropriate decompression routine.
481 cmid
= TOAST_COMPRESS_METHOD(attr
);
484 case TOAST_PGLZ_COMPRESSION_ID
:
485 return pglz_decompress_datum(attr
);
486 case TOAST_LZ4_COMPRESSION_ID
:
487 return lz4_decompress_datum(attr
);
489 elog(ERROR
, "invalid compression method id %d", cmid
);
490 return NULL
; /* keep compiler quiet */
496 * toast_decompress_datum_slice -
498 * Decompress the front of a compressed version of a varlena datum.
499 * offset handling happens in detoast_attr_slice.
500 * Here we just decompress a slice from the front.
502 static struct varlena
*
503 toast_decompress_datum_slice(struct varlena
*attr
, int32 slicelength
)
505 ToastCompressionId cmid
;
507 Assert(VARATT_IS_COMPRESSED(attr
));
510 * Some callers may pass a slicelength that's more than the actual
511 * decompressed size. If so, just decompress normally. This avoids
512 * possibly allocating a larger-than-necessary result object, and may be
513 * faster and/or more robust as well. Notably, some versions of liblz4
514 * have been seen to give wrong results if passed an output size that is
515 * more than the data's true decompressed size.
517 if ((uint32
) slicelength
>= TOAST_COMPRESS_EXTSIZE(attr
))
518 return toast_decompress_datum(attr
);
521 * Fetch the compression method id stored in the compression header and
522 * decompress the data slice using the appropriate decompression routine.
524 cmid
= TOAST_COMPRESS_METHOD(attr
);
527 case TOAST_PGLZ_COMPRESSION_ID
:
528 return pglz_decompress_datum_slice(attr
, slicelength
);
529 case TOAST_LZ4_COMPRESSION_ID
:
530 return lz4_decompress_datum_slice(attr
, slicelength
);
532 elog(ERROR
, "invalid compression method id %d", cmid
);
533 return NULL
; /* keep compiler quiet */
538 * toast_raw_datum_size -
540 * Return the raw (detoasted) size of a varlena datum
541 * (including the VARHDRSZ header)
545 toast_raw_datum_size(Datum value
)
547 struct varlena
*attr
= (struct varlena
*) DatumGetPointer(value
);
550 if (VARATT_IS_EXTERNAL_ONDISK(attr
))
552 /* va_rawsize is the size of the original datum -- including header */
553 struct varatt_external toast_pointer
;
555 VARATT_EXTERNAL_GET_POINTER(toast_pointer
, attr
);
556 result
= toast_pointer
.va_rawsize
;
558 else if (VARATT_IS_EXTERNAL_INDIRECT(attr
))
560 struct varatt_indirect toast_pointer
;
562 VARATT_EXTERNAL_GET_POINTER(toast_pointer
, attr
);
564 /* nested indirect Datums aren't allowed */
565 Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer
.pointer
));
567 return toast_raw_datum_size(PointerGetDatum(toast_pointer
.pointer
));
569 else if (VARATT_IS_EXTERNAL_EXPANDED(attr
))
571 result
= EOH_get_flat_size(DatumGetEOHP(value
));
573 else if (VARATT_IS_COMPRESSED(attr
))
575 /* here, va_rawsize is just the payload size */
576 result
= VARDATA_COMPRESSED_GET_EXTSIZE(attr
) + VARHDRSZ
;
578 else if (VARATT_IS_SHORT(attr
))
581 * we have to normalize the header length to VARHDRSZ or else the
582 * callers of this function will be confused.
584 result
= VARSIZE_SHORT(attr
) - VARHDRSZ_SHORT
+ VARHDRSZ
;
588 /* plain untoasted datum */
589 result
= VARSIZE(attr
);
597 * Return the physical storage size (possibly compressed) of a varlena datum
601 toast_datum_size(Datum value
)
603 struct varlena
*attr
= (struct varlena
*) DatumGetPointer(value
);
606 if (VARATT_IS_EXTERNAL_ONDISK(attr
))
609 * Attribute is stored externally - return the extsize whether
610 * compressed or not. We do not count the size of the toast pointer
613 struct varatt_external toast_pointer
;
615 VARATT_EXTERNAL_GET_POINTER(toast_pointer
, attr
);
616 result
= VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer
);
618 else if (VARATT_IS_EXTERNAL_INDIRECT(attr
))
620 struct varatt_indirect toast_pointer
;
622 VARATT_EXTERNAL_GET_POINTER(toast_pointer
, attr
);
624 /* nested indirect Datums aren't allowed */
625 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr
));
627 return toast_datum_size(PointerGetDatum(toast_pointer
.pointer
));
629 else if (VARATT_IS_EXTERNAL_EXPANDED(attr
))
631 result
= EOH_get_flat_size(DatumGetEOHP(value
));
633 else if (VARATT_IS_SHORT(attr
))
635 result
= VARSIZE_SHORT(attr
);
640 * Attribute is stored inline either compressed or not, just calculate
641 * the size of the datum in either case.
643 result
= VARSIZE(attr
);