Force a checkpoint in CREATE DATABASE before starting to copy the files,
[PostgreSQL.git] / src / backend / access / heap / tuptoaster.c
blob19159848a3e1b496a90f55271bc2f0eb4e0959d6
1 /*-------------------------------------------------------------------------
3 * tuptoaster.c
4 * Support routines for external and compressed storage of
5 * variable size attributes.
7 * Copyright (c) 2000-2008, PostgreSQL Global Development Group
10 * IDENTIFICATION
11 * $PostgreSQL$
14 * INTERFACE ROUTINES
15 * toast_insert_or_update -
16 * Try to make a given tuple fit into one page by compressing
17 * or moving off attributes
19 * toast_delete -
20 * Reclaim toast storage when a tuple is deleted
22 * heap_tuple_untoast_attr -
23 * Fetch back a given value from the "secondary" relation
25 *-------------------------------------------------------------------------
28 #include "postgres.h"
30 #include <unistd.h>
31 #include <fcntl.h>
33 #include "access/genam.h"
34 #include "access/heapam.h"
35 #include "access/tuptoaster.h"
36 #include "access/xact.h"
37 #include "catalog/catalog.h"
38 #include "utils/fmgroids.h"
39 #include "utils/pg_lzcompress.h"
40 #include "utils/rel.h"
41 #include "utils/typcache.h"
42 #include "utils/tqual.h"
45 #undef TOAST_DEBUG
47 /* Size of an EXTERNAL datum that contains a standard TOAST pointer */
48 #define TOAST_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_external))
51 * Testing whether an externally-stored value is compressed now requires
52 * comparing extsize (the actual length of the external data) to rawsize
53 * (the original uncompressed datum's size). The latter includes VARHDRSZ
54 * overhead, the former doesn't. We never use compression unless it actually
55 * saves space, so we expect either equality or less-than.
57 #define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \
58 ((toast_pointer).va_extsize < (toast_pointer).va_rawsize - VARHDRSZ)
61 * Macro to fetch the possibly-unaligned contents of an EXTERNAL datum
62 * into a local "struct varatt_external" toast pointer. This should be
63 * just a memcpy, but some versions of gcc seem to produce broken code
64 * that assumes the datum contents are aligned. Introducing an explicit
65 * intermediate "varattrib_1b_e *" variable seems to fix it.
67 #define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr) \
68 do { \
69 varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \
70 Assert(VARATT_IS_EXTERNAL(attre)); \
71 Assert(VARSIZE_EXTERNAL(attre) == sizeof(toast_pointer) + VARHDRSZ_EXTERNAL); \
72 memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \
73 } while (0)
76 static void toast_delete_datum(Relation rel, Datum value);
77 static Datum toast_save_datum(Relation rel, Datum value,
78 bool use_wal, bool use_fsm);
79 static struct varlena *toast_fetch_datum(struct varlena * attr);
80 static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
81 int32 sliceoffset, int32 length);
84 /* ----------
85 * heap_tuple_fetch_attr -
87 * Public entry point to get back a toasted value from
88 * external storage (possibly still in compressed format).
90 * This will return a datum that contains all the data internally, ie, not
91 * relying on external storage, but it can still be compressed or have a short
92 * header.
93 ----------
95 struct varlena *
96 heap_tuple_fetch_attr(struct varlena * attr)
98 struct varlena *result;
100 if (VARATT_IS_EXTERNAL(attr))
103 * This is an external stored plain value
105 result = toast_fetch_datum(attr);
107 else
110 * This is a plain value inside of the main tuple - why am I called?
112 result = attr;
115 return result;
119 /* ----------
120 * heap_tuple_untoast_attr -
122 * Public entry point to get back a toasted value from compression
123 * or external storage.
124 * ----------
126 struct varlena *
127 heap_tuple_untoast_attr(struct varlena * attr)
129 if (VARATT_IS_EXTERNAL(attr))
132 * This is an externally stored datum --- fetch it back from there
134 attr = toast_fetch_datum(attr);
135 /* If it's compressed, decompress it */
136 if (VARATT_IS_COMPRESSED(attr))
138 PGLZ_Header *tmp = (PGLZ_Header *) attr;
140 attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
141 SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
142 pglz_decompress(tmp, VARDATA(attr));
143 pfree(tmp);
146 else if (VARATT_IS_COMPRESSED(attr))
149 * This is a compressed value inside of the main tuple
151 PGLZ_Header *tmp = (PGLZ_Header *) attr;
153 attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
154 SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
155 pglz_decompress(tmp, VARDATA(attr));
157 else if (VARATT_IS_SHORT(attr))
160 * This is a short-header varlena --- convert to 4-byte header format
162 Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
163 Size new_size = data_size + VARHDRSZ;
164 struct varlena *new_attr;
166 new_attr = (struct varlena *) palloc(new_size);
167 SET_VARSIZE(new_attr, new_size);
168 memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
169 attr = new_attr;
172 return attr;
176 /* ----------
177 * heap_tuple_untoast_attr_slice -
179 * Public entry point to get back part of a toasted value
180 * from compression or external storage.
181 * ----------
183 struct varlena *
184 heap_tuple_untoast_attr_slice(struct varlena * attr,
185 int32 sliceoffset, int32 slicelength)
187 struct varlena *preslice;
188 struct varlena *result;
189 char *attrdata;
190 int32 attrsize;
192 if (VARATT_IS_EXTERNAL(attr))
194 struct varatt_external toast_pointer;
196 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
198 /* fast path for non-compressed external datums */
199 if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
200 return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
202 /* fetch it back (compressed marker will get set automatically) */
203 preslice = toast_fetch_datum(attr);
205 else
206 preslice = attr;
208 if (VARATT_IS_COMPRESSED(preslice))
210 PGLZ_Header *tmp = (PGLZ_Header *) preslice;
211 Size size = PGLZ_RAW_SIZE(tmp) + VARHDRSZ;
213 preslice = (struct varlena *) palloc(size);
214 SET_VARSIZE(preslice, size);
215 pglz_decompress(tmp, VARDATA(preslice));
217 if (tmp != (PGLZ_Header *) attr)
218 pfree(tmp);
221 if (VARATT_IS_SHORT(preslice))
223 attrdata = VARDATA_SHORT(preslice);
224 attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
226 else
228 attrdata = VARDATA(preslice);
229 attrsize = VARSIZE(preslice) - VARHDRSZ;
232 /* slicing of datum for compressed cases and plain value */
234 if (sliceoffset >= attrsize)
236 sliceoffset = 0;
237 slicelength = 0;
240 if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
241 slicelength = attrsize - sliceoffset;
243 result = (struct varlena *) palloc(slicelength + VARHDRSZ);
244 SET_VARSIZE(result, slicelength + VARHDRSZ);
246 memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
248 if (preslice != attr)
249 pfree(preslice);
251 return result;
255 /* ----------
256 * toast_raw_datum_size -
258 * Return the raw (detoasted) size of a varlena datum
259 * (including the VARHDRSZ header)
260 * ----------
262 Size
263 toast_raw_datum_size(Datum value)
265 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
266 Size result;
268 if (VARATT_IS_EXTERNAL(attr))
270 /* va_rawsize is the size of the original datum -- including header */
271 struct varatt_external toast_pointer;
273 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
274 result = toast_pointer.va_rawsize;
276 else if (VARATT_IS_COMPRESSED(attr))
278 /* here, va_rawsize is just the payload size */
279 result = VARRAWSIZE_4B_C(attr) + VARHDRSZ;
281 else if (VARATT_IS_SHORT(attr))
284 * we have to normalize the header length to VARHDRSZ or else the
285 * callers of this function will be confused.
287 result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
289 else
291 /* plain untoasted datum */
292 result = VARSIZE(attr);
294 return result;
297 /* ----------
298 * toast_datum_size
300 * Return the physical storage size (possibly compressed) of a varlena datum
301 * ----------
303 Size
304 toast_datum_size(Datum value)
306 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
307 Size result;
309 if (VARATT_IS_EXTERNAL(attr))
312 * Attribute is stored externally - return the extsize whether
313 * compressed or not. We do not count the size of the toast pointer
314 * ... should we?
316 struct varatt_external toast_pointer;
318 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
319 result = toast_pointer.va_extsize;
321 else if (VARATT_IS_SHORT(attr))
323 result = VARSIZE_SHORT(attr);
325 else
328 * Attribute is stored inline either compressed or not, just calculate
329 * the size of the datum in either case.
331 result = VARSIZE(attr);
333 return result;
337 /* ----------
338 * toast_delete -
340 * Cascaded delete toast-entries on DELETE
341 * ----------
343 void
344 toast_delete(Relation rel, HeapTuple oldtup)
346 TupleDesc tupleDesc;
347 Form_pg_attribute *att;
348 int numAttrs;
349 int i;
350 Datum toast_values[MaxHeapAttributeNumber];
351 bool toast_isnull[MaxHeapAttributeNumber];
354 * We should only ever be called for tuples of plain relations ---
355 * recursing on a toast rel is bad news.
357 Assert(rel->rd_rel->relkind == RELKIND_RELATION);
360 * Get the tuple descriptor and break down the tuple into fields.
362 * NOTE: it's debatable whether to use heap_deformtuple() here or just
363 * heap_getattr() only the varlena columns. The latter could win if there
364 * are few varlena columns and many non-varlena ones. However,
365 * heap_deformtuple costs only O(N) while the heap_getattr way would cost
366 * O(N^2) if there are many varlena columns, so it seems better to err on
367 * the side of linear cost. (We won't even be here unless there's at
368 * least one varlena column, by the way.)
370 tupleDesc = rel->rd_att;
371 att = tupleDesc->attrs;
372 numAttrs = tupleDesc->natts;
374 Assert(numAttrs <= MaxHeapAttributeNumber);
375 heap_deform_tuple(oldtup, tupleDesc, toast_values, toast_isnull);
378 * Check for external stored attributes and delete them from the secondary
379 * relation.
381 for (i = 0; i < numAttrs; i++)
383 if (att[i]->attlen == -1)
385 Datum value = toast_values[i];
387 if (!toast_isnull[i] && VARATT_IS_EXTERNAL(PointerGetDatum(value)))
388 toast_delete_datum(rel, value);
394 /* ----------
395 * toast_insert_or_update -
397 * Delete no-longer-used toast-entries and create new ones to
398 * make the new tuple fit on INSERT or UPDATE
400 * Inputs:
401 * newtup: the candidate new tuple to be inserted
402 * oldtup: the old row version for UPDATE, or NULL for INSERT
403 * use_wal, use_fsm: flags to be passed to heap_insert() for toast rows
404 * Result:
405 * either newtup if no toasting is needed, or a palloc'd modified tuple
406 * that is what should actually get stored
408 * NOTE: neither newtup nor oldtup will be modified. This is a change
409 * from the pre-8.1 API of this routine.
410 * ----------
412 HeapTuple
413 toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
414 bool use_wal, bool use_fsm)
416 HeapTuple result_tuple;
417 TupleDesc tupleDesc;
418 Form_pg_attribute *att;
419 int numAttrs;
420 int i;
422 bool need_change = false;
423 bool need_free = false;
424 bool need_delold = false;
425 bool has_nulls = false;
427 Size maxDataLen;
428 Size hoff;
430 char toast_action[MaxHeapAttributeNumber];
431 bool toast_isnull[MaxHeapAttributeNumber];
432 bool toast_oldisnull[MaxHeapAttributeNumber];
433 Datum toast_values[MaxHeapAttributeNumber];
434 Datum toast_oldvalues[MaxHeapAttributeNumber];
435 int32 toast_sizes[MaxHeapAttributeNumber];
436 bool toast_free[MaxHeapAttributeNumber];
437 bool toast_delold[MaxHeapAttributeNumber];
440 * We should only ever be called for tuples of plain relations ---
441 * recursing on a toast rel is bad news.
443 Assert(rel->rd_rel->relkind == RELKIND_RELATION);
446 * Get the tuple descriptor and break down the tuple(s) into fields.
448 tupleDesc = rel->rd_att;
449 att = tupleDesc->attrs;
450 numAttrs = tupleDesc->natts;
452 Assert(numAttrs <= MaxHeapAttributeNumber);
453 heap_deform_tuple(newtup, tupleDesc, toast_values, toast_isnull);
454 if (oldtup != NULL)
455 heap_deform_tuple(oldtup, tupleDesc, toast_oldvalues, toast_oldisnull);
457 /* ----------
458 * Then collect information about the values given
460 * NOTE: toast_action[i] can have these values:
461 * ' ' default handling
462 * 'p' already processed --- don't touch it
463 * 'x' incompressible, but OK to move off
465 * NOTE: toast_sizes[i] is only made valid for varlena attributes with
466 * toast_action[i] different from 'p'.
467 * ----------
469 memset(toast_action, ' ', numAttrs * sizeof(char));
470 memset(toast_free, 0, numAttrs * sizeof(bool));
471 memset(toast_delold, 0, numAttrs * sizeof(bool));
473 for (i = 0; i < numAttrs; i++)
475 struct varlena *old_value;
476 struct varlena *new_value;
478 if (oldtup != NULL)
481 * For UPDATE get the old and new values of this attribute
483 old_value = (struct varlena *) DatumGetPointer(toast_oldvalues[i]);
484 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
487 * If the old value is an external stored one, check if it has
488 * changed so we have to delete it later.
490 if (att[i]->attlen == -1 && !toast_oldisnull[i] &&
491 VARATT_IS_EXTERNAL(old_value))
493 if (toast_isnull[i] || !VARATT_IS_EXTERNAL(new_value) ||
494 memcmp((char *) old_value, (char *) new_value,
495 VARSIZE_EXTERNAL(old_value)) != 0)
498 * The old external stored value isn't needed any more
499 * after the update
501 toast_delold[i] = true;
502 need_delold = true;
504 else
507 * This attribute isn't changed by this update so we reuse
508 * the original reference to the old value in the new
509 * tuple.
511 toast_action[i] = 'p';
512 continue;
516 else
519 * For INSERT simply get the new value
521 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
525 * Handle NULL attributes
527 if (toast_isnull[i])
529 toast_action[i] = 'p';
530 has_nulls = true;
531 continue;
535 * Now look at varlena attributes
537 if (att[i]->attlen == -1)
540 * If the table's attribute says PLAIN always, force it so.
542 if (att[i]->attstorage == 'p')
543 toast_action[i] = 'p';
546 * We took care of UPDATE above, so any external value we find
547 * still in the tuple must be someone else's we cannot reuse.
548 * Fetch it back (without decompression, unless we are forcing
549 * PLAIN storage). If necessary, we'll push it out as a new
550 * external value below.
552 if (VARATT_IS_EXTERNAL(new_value))
554 if (att[i]->attstorage == 'p')
555 new_value = heap_tuple_untoast_attr(new_value);
556 else
557 new_value = heap_tuple_fetch_attr(new_value);
558 toast_values[i] = PointerGetDatum(new_value);
559 toast_free[i] = true;
560 need_change = true;
561 need_free = true;
565 * Remember the size of this attribute
567 toast_sizes[i] = VARSIZE_ANY(new_value);
569 else
572 * Not a varlena attribute, plain storage always
574 toast_action[i] = 'p';
578 /* ----------
579 * Compress and/or save external until data fits into target length
581 * 1: Inline compress attributes with attstorage 'x', and store very
582 * large attributes with attstorage 'x' or 'e' external immediately
583 * 2: Store attributes with attstorage 'x' or 'e' external
584 * 3: Inline compress attributes with attstorage 'm'
585 * 4: Store attributes with attstorage 'm' external
586 * ----------
589 /* compute header overhead --- this should match heap_form_tuple() */
590 hoff = offsetof(HeapTupleHeaderData, t_bits);
591 if (has_nulls)
592 hoff += BITMAPLEN(numAttrs);
593 if (newtup->t_data->t_infomask & HEAP_HASOID)
594 hoff += sizeof(Oid);
595 hoff = MAXALIGN(hoff);
596 Assert(hoff == newtup->t_data->t_hoff);
597 /* now convert to a limit on the tuple data size */
598 maxDataLen = TOAST_TUPLE_TARGET - hoff;
601 * Look for attributes with attstorage 'x' to compress. Also find large
602 * attributes with attstorage 'x' or 'e', and store them external.
604 while (heap_compute_data_size(tupleDesc,
605 toast_values, toast_isnull) > maxDataLen)
607 int biggest_attno = -1;
608 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
609 Datum old_value;
610 Datum new_value;
613 * Search for the biggest yet unprocessed internal attribute
615 for (i = 0; i < numAttrs; i++)
617 if (toast_action[i] != ' ')
618 continue;
619 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
620 continue; /* can't happen, toast_action would be 'p' */
621 if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
622 continue;
623 if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
624 continue;
625 if (toast_sizes[i] > biggest_size)
627 biggest_attno = i;
628 biggest_size = toast_sizes[i];
632 if (biggest_attno < 0)
633 break;
636 * Attempt to compress it inline, if it has attstorage 'x'
638 i = biggest_attno;
639 if (att[i]->attstorage == 'x')
641 old_value = toast_values[i];
642 new_value = toast_compress_datum(old_value);
644 if (DatumGetPointer(new_value) != NULL)
646 /* successful compression */
647 if (toast_free[i])
648 pfree(DatumGetPointer(old_value));
649 toast_values[i] = new_value;
650 toast_free[i] = true;
651 toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
652 need_change = true;
653 need_free = true;
655 else
657 /* incompressible, ignore on subsequent compression passes */
658 toast_action[i] = 'x';
661 else
663 /* has attstorage 'e', ignore on subsequent compression passes */
664 toast_action[i] = 'x';
668 * If this value is by itself more than maxDataLen (after compression
669 * if any), push it out to the toast table immediately, if possible.
670 * This avoids uselessly compressing other fields in the common case
671 * where we have one long field and several short ones.
673 * XXX maybe the threshold should be less than maxDataLen?
675 if (toast_sizes[i] > maxDataLen &&
676 rel->rd_rel->reltoastrelid != InvalidOid)
678 old_value = toast_values[i];
679 toast_action[i] = 'p';
680 toast_values[i] = toast_save_datum(rel, toast_values[i],
681 use_wal, use_fsm);
682 if (toast_free[i])
683 pfree(DatumGetPointer(old_value));
684 toast_free[i] = true;
685 need_change = true;
686 need_free = true;
691 * Second we look for attributes of attstorage 'x' or 'e' that are still
692 * inline. But skip this if there's no toast table to push them to.
694 while (heap_compute_data_size(tupleDesc,
695 toast_values, toast_isnull) > maxDataLen &&
696 rel->rd_rel->reltoastrelid != InvalidOid)
698 int biggest_attno = -1;
699 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
700 Datum old_value;
702 /*------
703 * Search for the biggest yet inlined attribute with
704 * attstorage equals 'x' or 'e'
705 *------
707 for (i = 0; i < numAttrs; i++)
709 if (toast_action[i] == 'p')
710 continue;
711 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
712 continue; /* can't happen, toast_action would be 'p' */
713 if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
714 continue;
715 if (toast_sizes[i] > biggest_size)
717 biggest_attno = i;
718 biggest_size = toast_sizes[i];
722 if (biggest_attno < 0)
723 break;
726 * Store this external
728 i = biggest_attno;
729 old_value = toast_values[i];
730 toast_action[i] = 'p';
731 toast_values[i] = toast_save_datum(rel, toast_values[i],
732 use_wal, use_fsm);
733 if (toast_free[i])
734 pfree(DatumGetPointer(old_value));
735 toast_free[i] = true;
737 need_change = true;
738 need_free = true;
742 * Round 3 - this time we take attributes with storage 'm' into
743 * compression
745 while (heap_compute_data_size(tupleDesc,
746 toast_values, toast_isnull) > maxDataLen)
748 int biggest_attno = -1;
749 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
750 Datum old_value;
751 Datum new_value;
754 * Search for the biggest yet uncompressed internal attribute
756 for (i = 0; i < numAttrs; i++)
758 if (toast_action[i] != ' ')
759 continue;
760 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
761 continue; /* can't happen, toast_action would be 'p' */
762 if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
763 continue;
764 if (att[i]->attstorage != 'm')
765 continue;
766 if (toast_sizes[i] > biggest_size)
768 biggest_attno = i;
769 biggest_size = toast_sizes[i];
773 if (biggest_attno < 0)
774 break;
777 * Attempt to compress it inline
779 i = biggest_attno;
780 old_value = toast_values[i];
781 new_value = toast_compress_datum(old_value);
783 if (DatumGetPointer(new_value) != NULL)
785 /* successful compression */
786 if (toast_free[i])
787 pfree(DatumGetPointer(old_value));
788 toast_values[i] = new_value;
789 toast_free[i] = true;
790 toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
791 need_change = true;
792 need_free = true;
794 else
796 /* incompressible, ignore on subsequent compression passes */
797 toast_action[i] = 'x';
802 * Finally we store attributes of type 'm' external, if possible.
804 while (heap_compute_data_size(tupleDesc,
805 toast_values, toast_isnull) > maxDataLen &&
806 rel->rd_rel->reltoastrelid != InvalidOid)
808 int biggest_attno = -1;
809 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
810 Datum old_value;
812 /*--------
813 * Search for the biggest yet inlined attribute with
814 * attstorage = 'm'
815 *--------
817 for (i = 0; i < numAttrs; i++)
819 if (toast_action[i] == 'p')
820 continue;
821 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
822 continue; /* can't happen, toast_action would be 'p' */
823 if (att[i]->attstorage != 'm')
824 continue;
825 if (toast_sizes[i] > biggest_size)
827 biggest_attno = i;
828 biggest_size = toast_sizes[i];
832 if (biggest_attno < 0)
833 break;
836 * Store this external
838 i = biggest_attno;
839 old_value = toast_values[i];
840 toast_action[i] = 'p';
841 toast_values[i] = toast_save_datum(rel, toast_values[i],
842 use_wal, use_fsm);
843 if (toast_free[i])
844 pfree(DatumGetPointer(old_value));
845 toast_free[i] = true;
847 need_change = true;
848 need_free = true;
852 * In the case we toasted any values, we need to build a new heap tuple
853 * with the changed values.
855 if (need_change)
857 HeapTupleHeader olddata = newtup->t_data;
858 HeapTupleHeader new_data;
859 int32 new_len;
860 int32 new_data_len;
863 * Calculate the new size of the tuple. Header size should not
864 * change, but data size might.
866 new_len = offsetof(HeapTupleHeaderData, t_bits);
867 if (has_nulls)
868 new_len += BITMAPLEN(numAttrs);
869 if (olddata->t_infomask & HEAP_HASOID)
870 new_len += sizeof(Oid);
871 new_len = MAXALIGN(new_len);
872 Assert(new_len == olddata->t_hoff);
873 new_data_len = heap_compute_data_size(tupleDesc,
874 toast_values, toast_isnull);
875 new_len += new_data_len;
878 * Allocate and zero the space needed, and fill HeapTupleData fields.
880 result_tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + new_len);
881 result_tuple->t_len = new_len;
882 result_tuple->t_self = newtup->t_self;
883 result_tuple->t_tableOid = newtup->t_tableOid;
884 new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE);
885 result_tuple->t_data = new_data;
888 * Put the existing tuple header and the changed values into place
890 memcpy(new_data, olddata, olddata->t_hoff);
892 heap_fill_tuple(tupleDesc,
893 toast_values,
894 toast_isnull,
895 (char *) new_data + olddata->t_hoff,
896 new_data_len,
897 &(new_data->t_infomask),
898 has_nulls ? new_data->t_bits : NULL);
900 else
901 result_tuple = newtup;
904 * Free allocated temp values
906 if (need_free)
907 for (i = 0; i < numAttrs; i++)
908 if (toast_free[i])
909 pfree(DatumGetPointer(toast_values[i]));
912 * Delete external values from the old tuple
914 if (need_delold)
915 for (i = 0; i < numAttrs; i++)
916 if (toast_delold[i])
917 toast_delete_datum(rel, toast_oldvalues[i]);
919 return result_tuple;
923 /* ----------
924 * toast_flatten_tuple_attribute -
926 * If a Datum is of composite type, "flatten" it to contain no toasted fields.
927 * This must be invoked on any potentially-composite field that is to be
928 * inserted into a tuple. Doing this preserves the invariant that toasting
929 * goes only one level deep in a tuple.
931 * Note that flattening does not mean expansion of short-header varlenas,
932 * so in one sense toasting is allowed within composite datums.
933 * ----------
935 Datum
936 toast_flatten_tuple_attribute(Datum value,
937 Oid typeId, int32 typeMod)
939 TupleDesc tupleDesc;
940 HeapTupleHeader olddata;
941 HeapTupleHeader new_data;
942 int32 new_len;
943 int32 new_data_len;
944 HeapTupleData tmptup;
945 Form_pg_attribute *att;
946 int numAttrs;
947 int i;
948 bool need_change = false;
949 bool has_nulls = false;
950 Datum toast_values[MaxTupleAttributeNumber];
951 bool toast_isnull[MaxTupleAttributeNumber];
952 bool toast_free[MaxTupleAttributeNumber];
955 * See if it's a composite type, and get the tupdesc if so.
957 tupleDesc = lookup_rowtype_tupdesc_noerror(typeId, typeMod, true);
958 if (tupleDesc == NULL)
959 return value; /* not a composite type */
961 att = tupleDesc->attrs;
962 numAttrs = tupleDesc->natts;
965 * Break down the tuple into fields.
967 olddata = DatumGetHeapTupleHeader(value);
968 Assert(typeId == HeapTupleHeaderGetTypeId(olddata));
969 Assert(typeMod == HeapTupleHeaderGetTypMod(olddata));
970 /* Build a temporary HeapTuple control structure */
971 tmptup.t_len = HeapTupleHeaderGetDatumLength(olddata);
972 ItemPointerSetInvalid(&(tmptup.t_self));
973 tmptup.t_tableOid = InvalidOid;
974 tmptup.t_data = olddata;
976 Assert(numAttrs <= MaxTupleAttributeNumber);
977 heap_deform_tuple(&tmptup, tupleDesc, toast_values, toast_isnull);
979 memset(toast_free, 0, numAttrs * sizeof(bool));
981 for (i = 0; i < numAttrs; i++)
984 * Look at non-null varlena attributes
986 if (toast_isnull[i])
987 has_nulls = true;
988 else if (att[i]->attlen == -1)
990 struct varlena *new_value;
992 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
993 if (VARATT_IS_EXTERNAL(new_value) ||
994 VARATT_IS_COMPRESSED(new_value))
996 new_value = heap_tuple_untoast_attr(new_value);
997 toast_values[i] = PointerGetDatum(new_value);
998 toast_free[i] = true;
999 need_change = true;
1005 * If nothing to untoast, just return the original tuple.
1007 if (!need_change)
1009 ReleaseTupleDesc(tupleDesc);
1010 return value;
1014 * Calculate the new size of the tuple. Header size should not change,
1015 * but data size might.
1017 new_len = offsetof(HeapTupleHeaderData, t_bits);
1018 if (has_nulls)
1019 new_len += BITMAPLEN(numAttrs);
1020 if (olddata->t_infomask & HEAP_HASOID)
1021 new_len += sizeof(Oid);
1022 new_len = MAXALIGN(new_len);
1023 Assert(new_len == olddata->t_hoff);
1024 new_data_len = heap_compute_data_size(tupleDesc,
1025 toast_values, toast_isnull);
1026 new_len += new_data_len;
1028 new_data = (HeapTupleHeader) palloc0(new_len);
1031 * Put the tuple header and the changed values into place
1033 memcpy(new_data, olddata, olddata->t_hoff);
1035 HeapTupleHeaderSetDatumLength(new_data, new_len);
1037 heap_fill_tuple(tupleDesc,
1038 toast_values,
1039 toast_isnull,
1040 (char *) new_data + olddata->t_hoff,
1041 new_data_len,
1042 &(new_data->t_infomask),
1043 has_nulls ? new_data->t_bits : NULL);
1046 * Free allocated temp values
1048 for (i = 0; i < numAttrs; i++)
1049 if (toast_free[i])
1050 pfree(DatumGetPointer(toast_values[i]));
1051 ReleaseTupleDesc(tupleDesc);
1053 return PointerGetDatum(new_data);
1057 /* ----------
1058 * toast_compress_datum -
1060 * Create a compressed version of a varlena datum
1062 * If we fail (ie, compressed result is actually bigger than original)
1063 * then return NULL. We must not use compressed data if it'd expand
1064 * the tuple!
1066 * We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
1067 * copying them. But we can't handle external or compressed datums.
1068 * ----------
1070 Datum
1071 toast_compress_datum(Datum value)
1073 struct varlena *tmp;
1074 int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
1076 Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
1077 Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
1080 * No point in wasting a palloc cycle if value size is out of the
1081 * allowed range for compression
1083 if (valsize < PGLZ_strategy_default->min_input_size ||
1084 valsize > PGLZ_strategy_default->max_input_size)
1085 return PointerGetDatum(NULL);
1087 tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize));
1090 * We recheck the actual size even if pglz_compress() reports success,
1091 * because it might be satisfied with having saved as little as one byte
1092 * in the compressed data --- which could turn into a net loss once you
1093 * consider header and alignment padding. Worst case, the compressed
1094 * format might require three padding bytes (plus header, which is included
1095 * in VARSIZE(tmp)), whereas the uncompressed format would take only one
1096 * header byte and no padding if the value is short enough. So we insist
1097 * on a savings of more than 2 bytes to ensure we have a gain.
1099 if (pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize,
1100 (PGLZ_Header *) tmp, PGLZ_strategy_default) &&
1101 VARSIZE(tmp) < valsize - 2)
1103 /* successful compression */
1104 return PointerGetDatum(tmp);
1106 else
1108 /* incompressible data */
1109 pfree(tmp);
1110 return PointerGetDatum(NULL);
1115 /* ----------
1116 * toast_save_datum -
1118 * Save one single datum into the secondary relation and return
1119 * a Datum reference for it.
1120 * ----------
1122 static Datum
1123 toast_save_datum(Relation rel, Datum value,
1124 bool use_wal, bool use_fsm)
1126 Relation toastrel;
1127 Relation toastidx;
1128 HeapTuple toasttup;
1129 TupleDesc toasttupDesc;
1130 Datum t_values[3];
1131 bool t_isnull[3];
1132 CommandId mycid = GetCurrentCommandId(true);
1133 struct varlena *result;
1134 struct varatt_external toast_pointer;
1135 struct
1137 struct varlena hdr;
1138 char data[TOAST_MAX_CHUNK_SIZE]; /* make struct big enough */
1139 int32 align_it; /* ensure struct is aligned well enough */
1140 } chunk_data;
1141 int32 chunk_size;
1142 int32 chunk_seq = 0;
1143 char *data_p;
1144 int32 data_todo;
1145 Pointer dval = DatumGetPointer(value);
1148 * Open the toast relation and its index. We can use the index to check
1149 * uniqueness of the OID we assign to the toasted item, even though it has
1150 * additional columns besides OID.
1152 toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
1153 toasttupDesc = toastrel->rd_att;
1154 toastidx = index_open(toastrel->rd_rel->reltoastidxid, RowExclusiveLock);
1157 * Get the data pointer and length, and compute va_rawsize and va_extsize.
1159 * va_rawsize is the size of the equivalent fully uncompressed datum, so
1160 * we have to adjust for short headers.
1162 * va_extsize is the actual size of the data payload in the toast records.
1164 if (VARATT_IS_SHORT(dval))
1166 data_p = VARDATA_SHORT(dval);
1167 data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT;
1168 toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */
1169 toast_pointer.va_extsize = data_todo;
1171 else if (VARATT_IS_COMPRESSED(dval))
1173 data_p = VARDATA(dval);
1174 data_todo = VARSIZE(dval) - VARHDRSZ;
1175 /* rawsize in a compressed datum is just the size of the payload */
1176 toast_pointer.va_rawsize = VARRAWSIZE_4B_C(dval) + VARHDRSZ;
1177 toast_pointer.va_extsize = data_todo;
1178 /* Assert that the numbers look like it's compressed */
1179 Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
1181 else
1183 data_p = VARDATA(dval);
1184 data_todo = VARSIZE(dval) - VARHDRSZ;
1185 toast_pointer.va_rawsize = VARSIZE(dval);
1186 toast_pointer.va_extsize = data_todo;
1189 toast_pointer.va_valueid = GetNewOidWithIndex(toastrel,
1190 RelationGetRelid(toastidx),
1191 (AttrNumber) 1);
1192 toast_pointer.va_toastrelid = rel->rd_rel->reltoastrelid;
1195 * Initialize constant parts of the tuple data
1197 t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid);
1198 t_values[2] = PointerGetDatum(&chunk_data);
1199 t_isnull[0] = false;
1200 t_isnull[1] = false;
1201 t_isnull[2] = false;
1204 * Split up the item into chunks
1206 while (data_todo > 0)
1209 * Calculate the size of this chunk
1211 chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo);
1214 * Build a tuple and store it
1216 t_values[1] = Int32GetDatum(chunk_seq++);
1217 SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ);
1218 memcpy(VARDATA(&chunk_data), data_p, chunk_size);
1219 toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
1221 heap_insert(toastrel, toasttup, mycid, use_wal, use_fsm);
1224 * Create the index entry. We cheat a little here by not using
1225 * FormIndexDatum: this relies on the knowledge that the index columns
1226 * are the same as the initial columns of the table.
1228 * Note also that there had better not be any user-created index on
1229 * the TOAST table, since we don't bother to update anything else.
1231 index_insert(toastidx, t_values, t_isnull,
1232 &(toasttup->t_self),
1233 toastrel, toastidx->rd_index->indisunique);
1236 * Free memory
1238 heap_freetuple(toasttup);
1241 * Move on to next chunk
1243 data_todo -= chunk_size;
1244 data_p += chunk_size;
1248 * Done - close toast relation
1250 index_close(toastidx, RowExclusiveLock);
1251 heap_close(toastrel, RowExclusiveLock);
1254 * Create the TOAST pointer value that we'll return
1256 result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
1257 SET_VARSIZE_EXTERNAL(result, TOAST_POINTER_SIZE);
1258 memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
1260 return PointerGetDatum(result);
1264 /* ----------
1265 * toast_delete_datum -
1267 * Delete a single external stored value.
1268 * ----------
1270 static void
1271 toast_delete_datum(Relation rel, Datum value)
1273 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
1274 struct varatt_external toast_pointer;
1275 Relation toastrel;
1276 Relation toastidx;
1277 ScanKeyData toastkey;
1278 SysScanDesc toastscan;
1279 HeapTuple toasttup;
1281 if (!VARATT_IS_EXTERNAL(attr))
1282 return;
1284 /* Must copy to access aligned fields */
1285 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1288 * Open the toast relation and its index
1290 toastrel = heap_open(toast_pointer.va_toastrelid, RowExclusiveLock);
1291 toastidx = index_open(toastrel->rd_rel->reltoastidxid, RowExclusiveLock);
1294 * Setup a scan key to find chunks with matching va_valueid
1296 ScanKeyInit(&toastkey,
1297 (AttrNumber) 1,
1298 BTEqualStrategyNumber, F_OIDEQ,
1299 ObjectIdGetDatum(toast_pointer.va_valueid));
1302 * Find all the chunks. (We don't actually care whether we see them in
1303 * sequence or not, but since we've already locked the index we might
1304 * as well use systable_beginscan_ordered.)
1306 toastscan = systable_beginscan_ordered(toastrel, toastidx,
1307 SnapshotToast, 1, &toastkey);
1308 while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
1311 * Have a chunk, delete it
1313 simple_heap_delete(toastrel, &toasttup->t_self);
1317 * End scan and close relations
1319 systable_endscan_ordered(toastscan);
1320 index_close(toastidx, RowExclusiveLock);
1321 heap_close(toastrel, RowExclusiveLock);
1325 /* ----------
1326 * toast_fetch_datum -
1328 * Reconstruct an in memory Datum from the chunks saved
1329 * in the toast relation
1330 * ----------
1332 static struct varlena *
1333 toast_fetch_datum(struct varlena * attr)
1335 Relation toastrel;
1336 Relation toastidx;
1337 ScanKeyData toastkey;
1338 SysScanDesc toastscan;
1339 HeapTuple ttup;
1340 TupleDesc toasttupDesc;
1341 struct varlena *result;
1342 struct varatt_external toast_pointer;
1343 int32 ressize;
1344 int32 residx,
1345 nextidx;
1346 int32 numchunks;
1347 Pointer chunk;
1348 bool isnull;
1349 char *chunkdata;
1350 int32 chunksize;
1352 /* Must copy to access aligned fields */
1353 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1355 ressize = toast_pointer.va_extsize;
1356 numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
1358 result = (struct varlena *) palloc(ressize + VARHDRSZ);
1360 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
1361 SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ);
1362 else
1363 SET_VARSIZE(result, ressize + VARHDRSZ);
1366 * Open the toast relation and its index
1368 toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock);
1369 toasttupDesc = toastrel->rd_att;
1370 toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock);
1373 * Setup a scan key to fetch from the index by va_valueid
1375 ScanKeyInit(&toastkey,
1376 (AttrNumber) 1,
1377 BTEqualStrategyNumber, F_OIDEQ,
1378 ObjectIdGetDatum(toast_pointer.va_valueid));
1381 * Read the chunks by index
1383 * Note that because the index is actually on (valueid, chunkidx) we will
1384 * see the chunks in chunkidx order, even though we didn't explicitly ask
1385 * for it.
1387 nextidx = 0;
1389 toastscan = systable_beginscan_ordered(toastrel, toastidx,
1390 SnapshotToast, 1, &toastkey);
1391 while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
1394 * Have a chunk, extract the sequence number and the data
1396 residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
1397 Assert(!isnull);
1398 chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
1399 Assert(!isnull);
1400 if (!VARATT_IS_EXTENDED(chunk))
1402 chunksize = VARSIZE(chunk) - VARHDRSZ;
1403 chunkdata = VARDATA(chunk);
1405 else if (VARATT_IS_SHORT(chunk))
1407 /* could happen due to heap_form_tuple doing its thing */
1408 chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
1409 chunkdata = VARDATA_SHORT(chunk);
1411 else
1413 /* should never happen */
1414 elog(ERROR, "found toasted toast chunk for toast value %u in %s",
1415 toast_pointer.va_valueid,
1416 RelationGetRelationName(toastrel));
1417 chunksize = 0; /* keep compiler quiet */
1418 chunkdata = NULL;
1422 * Some checks on the data we've found
1424 if (residx != nextidx)
1425 elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
1426 residx, nextidx,
1427 toast_pointer.va_valueid,
1428 RelationGetRelationName(toastrel));
1429 if (residx < numchunks - 1)
1431 if (chunksize != TOAST_MAX_CHUNK_SIZE)
1432 elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
1433 chunksize, (int) TOAST_MAX_CHUNK_SIZE,
1434 residx, numchunks,
1435 toast_pointer.va_valueid,
1436 RelationGetRelationName(toastrel));
1438 else if (residx == numchunks - 1)
1440 if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
1441 elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s",
1442 chunksize,
1443 (int) (ressize - residx * TOAST_MAX_CHUNK_SIZE),
1444 residx,
1445 toast_pointer.va_valueid,
1446 RelationGetRelationName(toastrel));
1448 else
1449 elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
1450 residx,
1451 0, numchunks - 1,
1452 toast_pointer.va_valueid,
1453 RelationGetRelationName(toastrel));
1456 * Copy the data into proper place in our result
1458 memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE,
1459 chunkdata,
1460 chunksize);
1462 nextidx++;
1466 * Final checks that we successfully fetched the datum
1468 if (nextidx != numchunks)
1469 elog(ERROR, "missing chunk number %d for toast value %u in %s",
1470 nextidx,
1471 toast_pointer.va_valueid,
1472 RelationGetRelationName(toastrel));
1475 * End scan and close relations
1477 systable_endscan_ordered(toastscan);
1478 index_close(toastidx, AccessShareLock);
1479 heap_close(toastrel, AccessShareLock);
1481 return result;
1484 /* ----------
1485 * toast_fetch_datum_slice -
1487 * Reconstruct a segment of a Datum from the chunks saved
1488 * in the toast relation
1489 * ----------
1491 static struct varlena *
1492 toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
1494 Relation toastrel;
1495 Relation toastidx;
1496 ScanKeyData toastkey[3];
1497 int nscankeys;
1498 SysScanDesc toastscan;
1499 HeapTuple ttup;
1500 TupleDesc toasttupDesc;
1501 struct varlena *result;
1502 struct varatt_external toast_pointer;
1503 int32 attrsize;
1504 int32 residx;
1505 int32 nextidx;
1506 int numchunks;
1507 int startchunk;
1508 int endchunk;
1509 int32 startoffset;
1510 int32 endoffset;
1511 int totalchunks;
1512 Pointer chunk;
1513 bool isnull;
1514 char *chunkdata;
1515 int32 chunksize;
1516 int32 chcpystrt;
1517 int32 chcpyend;
1519 Assert(VARATT_IS_EXTERNAL(attr));
1521 /* Must copy to access aligned fields */
1522 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1525 * It's nonsense to fetch slices of a compressed datum -- this isn't lo_*
1526 * we can't return a compressed datum which is meaningful to toast later
1528 Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
1530 attrsize = toast_pointer.va_extsize;
1531 totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
1533 if (sliceoffset >= attrsize)
1535 sliceoffset = 0;
1536 length = 0;
1539 if (((sliceoffset + length) > attrsize) || length < 0)
1540 length = attrsize - sliceoffset;
1542 result = (struct varlena *) palloc(length + VARHDRSZ);
1544 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
1545 SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
1546 else
1547 SET_VARSIZE(result, length + VARHDRSZ);
1549 if (length == 0)
1550 return result; /* Can save a lot of work at this point! */
1552 startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
1553 endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE;
1554 numchunks = (endchunk - startchunk) + 1;
1556 startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE;
1557 endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE;
1560 * Open the toast relation and its index
1562 toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock);
1563 toasttupDesc = toastrel->rd_att;
1564 toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock);
1567 * Setup a scan key to fetch from the index. This is either two keys or
1568 * three depending on the number of chunks.
1570 ScanKeyInit(&toastkey[0],
1571 (AttrNumber) 1,
1572 BTEqualStrategyNumber, F_OIDEQ,
1573 ObjectIdGetDatum(toast_pointer.va_valueid));
1576 * Use equality condition for one chunk, a range condition otherwise:
1578 if (numchunks == 1)
1580 ScanKeyInit(&toastkey[1],
1581 (AttrNumber) 2,
1582 BTEqualStrategyNumber, F_INT4EQ,
1583 Int32GetDatum(startchunk));
1584 nscankeys = 2;
1586 else
1588 ScanKeyInit(&toastkey[1],
1589 (AttrNumber) 2,
1590 BTGreaterEqualStrategyNumber, F_INT4GE,
1591 Int32GetDatum(startchunk));
1592 ScanKeyInit(&toastkey[2],
1593 (AttrNumber) 2,
1594 BTLessEqualStrategyNumber, F_INT4LE,
1595 Int32GetDatum(endchunk));
1596 nscankeys = 3;
1600 * Read the chunks by index
1602 * The index is on (valueid, chunkidx) so they will come in order
1604 nextidx = startchunk;
1605 toastscan = systable_beginscan_ordered(toastrel, toastidx,
1606 SnapshotToast, nscankeys, toastkey);
1607 while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
1610 * Have a chunk, extract the sequence number and the data
1612 residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
1613 Assert(!isnull);
1614 chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
1615 Assert(!isnull);
1616 if (!VARATT_IS_EXTENDED(chunk))
1618 chunksize = VARSIZE(chunk) - VARHDRSZ;
1619 chunkdata = VARDATA(chunk);
1621 else if (VARATT_IS_SHORT(chunk))
1623 /* could happen due to heap_form_tuple doing its thing */
1624 chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
1625 chunkdata = VARDATA_SHORT(chunk);
1627 else
1629 /* should never happen */
1630 elog(ERROR, "found toasted toast chunk for toast value %u in %s",
1631 toast_pointer.va_valueid,
1632 RelationGetRelationName(toastrel));
1633 chunksize = 0; /* keep compiler quiet */
1634 chunkdata = NULL;
1638 * Some checks on the data we've found
1640 if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk))
1641 elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
1642 residx, nextidx,
1643 toast_pointer.va_valueid,
1644 RelationGetRelationName(toastrel));
1645 if (residx < totalchunks - 1)
1647 if (chunksize != TOAST_MAX_CHUNK_SIZE)
1648 elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice",
1649 chunksize, (int) TOAST_MAX_CHUNK_SIZE,
1650 residx, totalchunks,
1651 toast_pointer.va_valueid,
1652 RelationGetRelationName(toastrel));
1654 else if (residx == totalchunks - 1)
1656 if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize)
1657 elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice",
1658 chunksize,
1659 (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE),
1660 residx,
1661 toast_pointer.va_valueid,
1662 RelationGetRelationName(toastrel));
1664 else
1665 elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
1666 residx,
1667 0, totalchunks - 1,
1668 toast_pointer.va_valueid,
1669 RelationGetRelationName(toastrel));
1672 * Copy the data into proper place in our result
1674 chcpystrt = 0;
1675 chcpyend = chunksize - 1;
1676 if (residx == startchunk)
1677 chcpystrt = startoffset;
1678 if (residx == endchunk)
1679 chcpyend = endoffset;
1681 memcpy(VARDATA(result) +
1682 (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt,
1683 chunkdata + chcpystrt,
1684 (chcpyend - chcpystrt) + 1);
1686 nextidx++;
1690 * Final checks that we successfully fetched the datum
1692 if (nextidx != (endchunk + 1))
1693 elog(ERROR, "missing chunk number %d for toast value %u in %s",
1694 nextidx,
1695 toast_pointer.va_valueid,
1696 RelationGetRelationName(toastrel));
1699 * End scan and close relations
1701 systable_endscan_ordered(toastscan);
1702 index_close(toastidx, AccessShareLock);
1703 heap_close(toastrel, AccessShareLock);
1705 return result;