1 /*-------------------------------------------------------------------------
4 * PostgreSQL tuple-id (TID) bitmap package
6 * This module provides bitmap data structures that are spiritually
7 * similar to Bitmapsets, but are specially adapted to store sets of
8 * tuple identifiers (TIDs), or ItemPointers. In particular, the division
9 * of an ItemPointer into BlockNumber and OffsetNumber is catered for.
10 * Also, since we wish to be able to store very large tuple sets in
11 * memory with this data structure, we support "lossy" storage, in which
12 * we no longer remember individual tuple offsets on a page but only the
13 * fact that a particular page needs to be visited.
15 * The "lossy" storage uses one bit per disk page, so at the standard 8K
16 * BLCKSZ, we can represent all pages in 64Gb of disk space in about 1Mb
17 * of memory. People pushing around tables of that size should have a
18 * couple of Mb to spare, so we don't worry about providing a second level
19 * of lossiness. In theory we could fall back to page ranges at some
20 * point, but for now that seems useless complexity.
22 * We also support the notion of candidate matches, or rechecking. This
23 * means we know that a search need visit only some tuples on a page,
24 * but we are not certain that all of those tuples are real matches.
25 * So the eventual heap scan must recheck the quals for these tuples only,
26 * rather than rechecking the quals for all tuples on the page as in the
27 * lossy-bitmap case. Rechecking can be specified when TIDs are inserted
28 * into a bitmap, and it can also happen internally when we AND a lossy
29 * and a non-lossy page.
32 * Copyright (c) 2003-2008, PostgreSQL Global Development Group
37 *-------------------------------------------------------------------------
43 #include "access/htup.h"
44 #include "nodes/bitmapset.h"
45 #include "nodes/tidbitmap.h"
46 #include "storage/bufpage.h"
47 #include "utils/hsearch.h"
50 * The maximum number of tuples per page is not large (typically 256 with
51 * 8K pages, or 1024 with 32K pages). So there's not much point in making
52 * the per-page bitmaps variable size. We just legislate that the size
55 #define MAX_TUPLES_PER_PAGE MaxHeapTuplesPerPage
58 * When we have to switch over to lossy storage, we use a data structure
59 * with one bit per page, where all pages having the same number DIV
60 * PAGES_PER_CHUNK are aggregated into one chunk. When a chunk is present
61 * and has the bit set for a given page, there must not be a per-page entry
62 * for that page in the page table.
64 * We actually store both exact pages and lossy chunks in the same hash
65 * table, using identical data structures. (This is because dynahash.c's
66 * memory management doesn't allow space to be transferred easily from one
67 * hashtable to another.) Therefore it's best if PAGES_PER_CHUNK is the
68 * same as MAX_TUPLES_PER_PAGE, or at least not too different. But we
69 * also want PAGES_PER_CHUNK to be a power of 2 to avoid expensive integer
70 * remainder operations. So, define it like this:
72 #define PAGES_PER_CHUNK (BLCKSZ / 32)
74 /* We use BITS_PER_BITMAPWORD and typedef bitmapword from nodes/bitmapset.h */
76 #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
77 #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
79 /* number of active words for an exact page: */
80 #define WORDS_PER_PAGE ((MAX_TUPLES_PER_PAGE - 1) / BITS_PER_BITMAPWORD + 1)
81 /* number of active words for a lossy chunk: */
82 #define WORDS_PER_CHUNK ((PAGES_PER_CHUNK - 1) / BITS_PER_BITMAPWORD + 1)
85 * The hashtable entries are represented by this data structure. For
86 * an exact page, blockno is the page number and bit k of the bitmap
87 * represents tuple offset k+1. For a lossy chunk, blockno is the first
88 * page in the chunk (this must be a multiple of PAGES_PER_CHUNK) and
89 * bit k represents page blockno+k. Note that it is not possible to
90 * have exact storage for the first page of a chunk if we are using
91 * lossy storage for any page in the chunk's range, since the same
92 * hashtable entry has to serve both purposes.
94 * recheck is used only on exact pages --- it indicates that although
95 * only the stated tuples need be checked, the full index qual condition
96 * must be checked for each (ie, these are candidate matches).
98 typedef struct PagetableEntry
100 BlockNumber blockno
; /* page number (hashtable key) */
101 bool ischunk
; /* T = lossy storage, F = exact */
102 bool recheck
; /* should the tuples be rechecked? */
103 bitmapword words
[Max(WORDS_PER_PAGE
, WORDS_PER_CHUNK
)];
107 * dynahash.c is optimized for relatively large, long-lived hash tables.
108 * This is not ideal for TIDBitMap, particularly when we are using a bitmap
109 * scan on the inside of a nestloop join: a bitmap may well live only long
110 * enough to accumulate one entry in such cases. We therefore avoid creating
111 * an actual hashtable until we need two pagetable entries. When just one
112 * pagetable entry is needed, we store it in a fixed field of TIDBitMap.
113 * (NOTE: we don't get rid of the hashtable if the bitmap later shrinks down
114 * to zero or one page again. So, status can be TBM_HASH even when nentries
119 TBM_EMPTY
, /* no hashtable, nentries == 0 */
120 TBM_ONE_PAGE
, /* entry1 contains the single entry */
121 TBM_HASH
/* pagetable is valid, entry1 is not */
125 * Here is the representation for a whole TIDBitMap:
129 NodeTag type
; /* to make it a valid Node */
130 MemoryContext mcxt
; /* memory context containing me */
131 TBMStatus status
; /* see codes above */
132 HTAB
*pagetable
; /* hash table of PagetableEntry's */
133 int nentries
; /* number of entries in pagetable */
134 int maxentries
; /* limit on same to meet maxbytes */
135 int npages
; /* number of exact entries in pagetable */
136 int nchunks
; /* number of lossy entries in pagetable */
137 bool iterating
; /* tbm_begin_iterate called? */
138 PagetableEntry entry1
; /* used when status == TBM_ONE_PAGE */
139 /* the remaining fields are used while producing sorted output: */
140 PagetableEntry
**spages
; /* sorted exact-page list, or NULL */
141 PagetableEntry
**schunks
; /* sorted lossy-chunk list, or NULL */
142 int spageptr
; /* next spages index */
143 int schunkptr
; /* next schunks index */
144 int schunkbit
; /* next bit to check in current schunk */
145 TBMIterateResult output
; /* MUST BE LAST (because variable-size) */
149 /* Local function prototypes */
150 static void tbm_union_page(TIDBitmap
*a
, const PagetableEntry
*bpage
);
151 static bool tbm_intersect_page(TIDBitmap
*a
, PagetableEntry
*apage
,
153 static const PagetableEntry
*tbm_find_pageentry(const TIDBitmap
*tbm
,
155 static PagetableEntry
*tbm_get_pageentry(TIDBitmap
*tbm
, BlockNumber pageno
);
156 static bool tbm_page_is_lossy(const TIDBitmap
*tbm
, BlockNumber pageno
);
157 static void tbm_mark_page_lossy(TIDBitmap
*tbm
, BlockNumber pageno
);
158 static void tbm_lossify(TIDBitmap
*tbm
);
159 static int tbm_comparator(const void *left
, const void *right
);
163 * tbm_create - create an initially-empty bitmap
165 * The bitmap will live in the memory context that is CurrentMemoryContext
166 * at the time of this call. It will be limited to (approximately) maxbytes
167 * total memory consumption.
170 tbm_create(long maxbytes
)
176 * Create the TIDBitmap struct, with enough trailing space to serve the
177 * needs of the TBMIterateResult sub-struct.
179 tbm
= (TIDBitmap
*) palloc(sizeof(TIDBitmap
) +
180 MAX_TUPLES_PER_PAGE
* sizeof(OffsetNumber
));
181 /* Zero all the fixed fields */
182 MemSetAligned(tbm
, 0, sizeof(TIDBitmap
));
184 tbm
->type
= T_TIDBitmap
; /* Set NodeTag */
185 tbm
->mcxt
= CurrentMemoryContext
;
186 tbm
->status
= TBM_EMPTY
;
189 * Estimate number of hashtable entries we can have within maxbytes. This
190 * estimates the hash overhead at MAXALIGN(sizeof(HASHELEMENT)) plus a
191 * pointer per hash entry, which is crude but good enough for our purpose.
192 * Also count an extra Pointer per entry for the arrays created during
195 nbuckets
= maxbytes
/
196 (MAXALIGN(sizeof(HASHELEMENT
)) + MAXALIGN(sizeof(PagetableEntry
))
197 + sizeof(Pointer
) + sizeof(Pointer
));
198 nbuckets
= Min(nbuckets
, INT_MAX
- 1); /* safety limit */
199 nbuckets
= Max(nbuckets
, 16); /* sanity limit */
200 tbm
->maxentries
= (int) nbuckets
;
206 * Actually create the hashtable. Since this is a moderately expensive
207 * proposition, we don't do it until we have to.
210 tbm_create_pagetable(TIDBitmap
*tbm
)
214 Assert(tbm
->status
!= TBM_HASH
);
215 Assert(tbm
->pagetable
== NULL
);
217 /* Create the hashtable proper */
218 MemSet(&hash_ctl
, 0, sizeof(hash_ctl
));
219 hash_ctl
.keysize
= sizeof(BlockNumber
);
220 hash_ctl
.entrysize
= sizeof(PagetableEntry
);
221 hash_ctl
.hash
= tag_hash
;
222 hash_ctl
.hcxt
= tbm
->mcxt
;
223 tbm
->pagetable
= hash_create("TIDBitmap",
224 128, /* start small and extend */
226 HASH_ELEM
| HASH_FUNCTION
| HASH_CONTEXT
);
228 /* If entry1 is valid, push it into the hashtable */
229 if (tbm
->status
== TBM_ONE_PAGE
)
231 PagetableEntry
*page
;
234 page
= (PagetableEntry
*) hash_search(tbm
->pagetable
,
235 (void *) &tbm
->entry1
.blockno
,
238 memcpy(page
, &tbm
->entry1
, sizeof(PagetableEntry
));
241 tbm
->status
= TBM_HASH
;
245 * tbm_free - free a TIDBitmap
248 tbm_free(TIDBitmap
*tbm
)
251 hash_destroy(tbm
->pagetable
);
260 * tbm_add_tuples - add some tuple IDs to a TIDBitmap
262 * If recheck is true, then the recheck flag will be set in the
263 * TBMIterateResult when any of these tuples are reported out.
266 tbm_add_tuples(TIDBitmap
*tbm
, const ItemPointer tids
, int ntids
,
271 Assert(!tbm
->iterating
);
272 for (i
= 0; i
< ntids
; i
++)
274 BlockNumber blk
= ItemPointerGetBlockNumber(tids
+ i
);
275 OffsetNumber off
= ItemPointerGetOffsetNumber(tids
+ i
);
276 PagetableEntry
*page
;
280 /* safety check to ensure we don't overrun bit array bounds */
281 if (off
< 1 || off
> MAX_TUPLES_PER_PAGE
)
282 elog(ERROR
, "tuple offset out of range: %u", off
);
284 if (tbm_page_is_lossy(tbm
, blk
))
285 continue; /* whole page is already marked */
287 page
= tbm_get_pageentry(tbm
, blk
);
291 /* The page is a lossy chunk header, set bit for itself */
292 wordnum
= bitnum
= 0;
296 /* Page is exact, so set bit for individual tuple */
297 wordnum
= WORDNUM(off
- 1);
298 bitnum
= BITNUM(off
- 1);
300 page
->words
[wordnum
] |= ((bitmapword
) 1 << bitnum
);
301 page
->recheck
|= recheck
;
303 if (tbm
->nentries
> tbm
->maxentries
)
309 * tbm_union - set union
311 * a is modified in-place, b is not changed
314 tbm_union(TIDBitmap
*a
, const TIDBitmap
*b
)
316 Assert(!a
->iterating
);
317 /* Nothing to do if b is empty */
318 if (b
->nentries
== 0)
320 /* Scan through chunks and pages in b, merge into a */
321 if (b
->status
== TBM_ONE_PAGE
)
322 tbm_union_page(a
, &b
->entry1
);
325 HASH_SEQ_STATUS status
;
326 PagetableEntry
*bpage
;
328 Assert(b
->status
== TBM_HASH
);
329 hash_seq_init(&status
, b
->pagetable
);
330 while ((bpage
= (PagetableEntry
*) hash_seq_search(&status
)) != NULL
)
331 tbm_union_page(a
, bpage
);
335 /* Process one page of b during a union op */
337 tbm_union_page(TIDBitmap
*a
, const PagetableEntry
*bpage
)
339 PagetableEntry
*apage
;
344 /* Scan b's chunk, mark each indicated page lossy in a */
345 for (wordnum
= 0; wordnum
< WORDS_PER_PAGE
; wordnum
++)
347 bitmapword w
= bpage
->words
[wordnum
];
353 pg
= bpage
->blockno
+ (wordnum
* BITS_PER_BITMAPWORD
);
357 tbm_mark_page_lossy(a
, pg
);
364 else if (tbm_page_is_lossy(a
, bpage
->blockno
))
366 /* page is already lossy in a, nothing to do */
371 apage
= tbm_get_pageentry(a
, bpage
->blockno
);
374 /* The page is a lossy chunk header, set bit for itself */
375 apage
->words
[0] |= ((bitmapword
) 1 << 0);
379 /* Both pages are exact, merge at the bit level */
380 for (wordnum
= 0; wordnum
< WORDS_PER_PAGE
; wordnum
++)
381 apage
->words
[wordnum
] |= bpage
->words
[wordnum
];
382 apage
->recheck
|= bpage
->recheck
;
386 if (a
->nentries
> a
->maxentries
)
391 * tbm_intersect - set intersection
393 * a is modified in-place, b is not changed
396 tbm_intersect(TIDBitmap
*a
, const TIDBitmap
*b
)
398 Assert(!a
->iterating
);
399 /* Nothing to do if a is empty */
400 if (a
->nentries
== 0)
402 /* Scan through chunks and pages in a, try to match to b */
403 if (a
->status
== TBM_ONE_PAGE
)
405 if (tbm_intersect_page(a
, &a
->entry1
, b
))
407 /* Page is now empty, remove it from a */
408 Assert(!a
->entry1
.ischunk
);
411 Assert(a
->nentries
== 0);
412 a
->status
= TBM_EMPTY
;
417 HASH_SEQ_STATUS status
;
418 PagetableEntry
*apage
;
420 Assert(a
->status
== TBM_HASH
);
421 hash_seq_init(&status
, a
->pagetable
);
422 while ((apage
= (PagetableEntry
*) hash_seq_search(&status
)) != NULL
)
424 if (tbm_intersect_page(a
, apage
, b
))
426 /* Page or chunk is now empty, remove it from a */
432 if (hash_search(a
->pagetable
,
433 (void *) &apage
->blockno
,
434 HASH_REMOVE
, NULL
) == NULL
)
435 elog(ERROR
, "hash table corrupted");
442 * Process one page of a during an intersection op
444 * Returns TRUE if apage is now empty and should be deleted from a
447 tbm_intersect_page(TIDBitmap
*a
, PagetableEntry
*apage
, const TIDBitmap
*b
)
449 const PagetableEntry
*bpage
;
454 /* Scan each bit in chunk, try to clear */
455 bool candelete
= true;
457 for (wordnum
= 0; wordnum
< WORDS_PER_PAGE
; wordnum
++)
459 bitmapword w
= apage
->words
[wordnum
];
467 pg
= apage
->blockno
+ (wordnum
* BITS_PER_BITMAPWORD
);
473 if (!tbm_page_is_lossy(b
, pg
) &&
474 tbm_find_pageentry(b
, pg
) == NULL
)
476 /* Page is not in b at all, lose lossy bit */
477 neww
&= ~((bitmapword
) 1 << bitnum
);
484 apage
->words
[wordnum
] = neww
;
491 else if (tbm_page_is_lossy(b
, apage
->blockno
))
494 * Some of the tuples in 'a' might not satisfy the quals for 'b',
495 * but because the page 'b' is lossy, we don't know which ones.
496 * Therefore we mark 'a' as requiring rechecks, to indicate that
497 * at most those tuples set in 'a' are matches.
499 apage
->recheck
= true;
504 bool candelete
= true;
506 bpage
= tbm_find_pageentry(b
, apage
->blockno
);
509 /* Both pages are exact, merge at the bit level */
510 Assert(!bpage
->ischunk
);
511 for (wordnum
= 0; wordnum
< WORDS_PER_PAGE
; wordnum
++)
513 apage
->words
[wordnum
] &= bpage
->words
[wordnum
];
514 if (apage
->words
[wordnum
] != 0)
517 apage
->recheck
|= bpage
->recheck
;
519 /* If there is no matching b page, we can just delete the a page */
525 * tbm_is_empty - is a TIDBitmap completely empty?
528 tbm_is_empty(const TIDBitmap
*tbm
)
530 return (tbm
->nentries
== 0);
534 * tbm_begin_iterate - prepare to iterate through a TIDBitmap
536 * NB: after this is called, it is no longer allowed to modify the contents
537 * of the bitmap. However, you can call this multiple times to scan the
538 * contents repeatedly.
541 tbm_begin_iterate(TIDBitmap
*tbm
)
543 HASH_SEQ_STATUS status
;
544 PagetableEntry
*page
;
548 tbm
->iterating
= true;
551 * Reset iteration pointers.
558 * Nothing else to do if no entries, nor if we don't have a hashtable.
560 if (tbm
->nentries
== 0 || tbm
->status
!= TBM_HASH
)
564 * Create and fill the sorted page lists if we didn't already.
566 if (!tbm
->spages
&& tbm
->npages
> 0)
567 tbm
->spages
= (PagetableEntry
**)
568 MemoryContextAlloc(tbm
->mcxt
,
569 tbm
->npages
* sizeof(PagetableEntry
*));
570 if (!tbm
->schunks
&& tbm
->nchunks
> 0)
571 tbm
->schunks
= (PagetableEntry
**)
572 MemoryContextAlloc(tbm
->mcxt
,
573 tbm
->nchunks
* sizeof(PagetableEntry
*));
575 hash_seq_init(&status
, tbm
->pagetable
);
576 npages
= nchunks
= 0;
577 while ((page
= (PagetableEntry
*) hash_seq_search(&status
)) != NULL
)
580 tbm
->schunks
[nchunks
++] = page
;
582 tbm
->spages
[npages
++] = page
;
584 Assert(npages
== tbm
->npages
);
585 Assert(nchunks
== tbm
->nchunks
);
587 qsort(tbm
->spages
, npages
, sizeof(PagetableEntry
*), tbm_comparator
);
589 qsort(tbm
->schunks
, nchunks
, sizeof(PagetableEntry
*), tbm_comparator
);
593 * tbm_iterate - scan through next page of a TIDBitmap
595 * Returns a TBMIterateResult representing one page, or NULL if there are
596 * no more pages to scan. Pages are guaranteed to be delivered in numerical
597 * order. If result->ntuples < 0, then the bitmap is "lossy" and failed to
598 * remember the exact tuples to look at on this page --- the caller must
599 * examine all tuples on the page and check if they meet the intended
600 * condition. If result->recheck is true, only the indicated tuples need
601 * be examined, but the condition must be rechecked anyway. (For ease of
602 * testing, recheck is always set true when ntuples < 0.)
605 tbm_iterate(TIDBitmap
*tbm
)
607 TBMIterateResult
*output
= &(tbm
->output
);
609 Assert(tbm
->iterating
);
612 * If lossy chunk pages remain, make sure we've advanced schunkptr/
613 * schunkbit to the next set bit.
615 while (tbm
->schunkptr
< tbm
->nchunks
)
617 PagetableEntry
*chunk
= tbm
->schunks
[tbm
->schunkptr
];
618 int schunkbit
= tbm
->schunkbit
;
620 while (schunkbit
< PAGES_PER_CHUNK
)
622 int wordnum
= WORDNUM(schunkbit
);
623 int bitnum
= BITNUM(schunkbit
);
625 if ((chunk
->words
[wordnum
] & ((bitmapword
) 1 << bitnum
)) != 0)
629 if (schunkbit
< PAGES_PER_CHUNK
)
631 tbm
->schunkbit
= schunkbit
;
634 /* advance to next chunk */
640 * If both chunk and per-page data remain, must output the numerically
643 if (tbm
->schunkptr
< tbm
->nchunks
)
645 PagetableEntry
*chunk
= tbm
->schunks
[tbm
->schunkptr
];
646 BlockNumber chunk_blockno
;
648 chunk_blockno
= chunk
->blockno
+ tbm
->schunkbit
;
649 if (tbm
->spageptr
>= tbm
->npages
||
650 chunk_blockno
< tbm
->spages
[tbm
->spageptr
]->blockno
)
652 /* Return a lossy page indicator from the chunk */
653 output
->blockno
= chunk_blockno
;
654 output
->ntuples
= -1;
655 output
->recheck
= true;
661 if (tbm
->spageptr
< tbm
->npages
)
663 PagetableEntry
*page
;
667 /* In ONE_PAGE state, we don't allocate an spages[] array */
668 if (tbm
->status
== TBM_ONE_PAGE
)
671 page
= tbm
->spages
[tbm
->spageptr
];
673 /* scan bitmap to extract individual offset numbers */
675 for (wordnum
= 0; wordnum
< WORDS_PER_PAGE
; wordnum
++)
677 bitmapword w
= page
->words
[wordnum
];
681 int off
= wordnum
* BITS_PER_BITMAPWORD
+ 1;
686 output
->offsets
[ntuples
++] = (OffsetNumber
) off
;
692 output
->blockno
= page
->blockno
;
693 output
->ntuples
= ntuples
;
694 output
->recheck
= page
->recheck
;
699 /* Nothing more in the bitmap */
704 * tbm_find_pageentry - find a PagetableEntry for the pageno
706 * Returns NULL if there is no non-lossy entry for the pageno.
708 static const PagetableEntry
*
709 tbm_find_pageentry(const TIDBitmap
*tbm
, BlockNumber pageno
)
711 const PagetableEntry
*page
;
713 if (tbm
->nentries
== 0) /* in case pagetable doesn't exist */
716 if (tbm
->status
== TBM_ONE_PAGE
)
719 if (page
->blockno
!= pageno
)
721 Assert(!page
->ischunk
);
725 page
= (PagetableEntry
*) hash_search(tbm
->pagetable
,
731 return NULL
; /* don't want a lossy chunk header */
736 * tbm_get_pageentry - find or create a PagetableEntry for the pageno
738 * If new, the entry is marked as an exact (non-chunk) entry.
740 * This may cause the table to exceed the desired memory size. It is
741 * up to the caller to call tbm_lossify() at the next safe point if so.
743 static PagetableEntry
*
744 tbm_get_pageentry(TIDBitmap
*tbm
, BlockNumber pageno
)
746 PagetableEntry
*page
;
749 if (tbm
->status
== TBM_EMPTY
)
751 /* Use the fixed slot */
754 tbm
->status
= TBM_ONE_PAGE
;
758 if (tbm
->status
== TBM_ONE_PAGE
)
761 if (page
->blockno
== pageno
)
763 /* Time to switch from one page to a hashtable */
764 tbm_create_pagetable(tbm
);
767 /* Look up or create an entry */
768 page
= (PagetableEntry
*) hash_search(tbm
->pagetable
,
773 /* Initialize it if not present before */
776 MemSet(page
, 0, sizeof(PagetableEntry
));
777 page
->blockno
= pageno
;
778 /* must count it too */
787 * tbm_page_is_lossy - is the page marked as lossily stored?
790 tbm_page_is_lossy(const TIDBitmap
*tbm
, BlockNumber pageno
)
792 PagetableEntry
*page
;
793 BlockNumber chunk_pageno
;
796 /* we can skip the lookup if there are no lossy chunks */
797 if (tbm
->nchunks
== 0)
799 Assert(tbm
->status
== TBM_HASH
);
801 bitno
= pageno
% PAGES_PER_CHUNK
;
802 chunk_pageno
= pageno
- bitno
;
803 page
= (PagetableEntry
*) hash_search(tbm
->pagetable
,
804 (void *) &chunk_pageno
,
806 if (page
!= NULL
&& page
->ischunk
)
808 int wordnum
= WORDNUM(bitno
);
809 int bitnum
= BITNUM(bitno
);
811 if ((page
->words
[wordnum
] & ((bitmapword
) 1 << bitnum
)) != 0)
818 * tbm_mark_page_lossy - mark the page number as lossily stored
820 * This may cause the table to exceed the desired memory size. It is
821 * up to the caller to call tbm_lossify() at the next safe point if so.
824 tbm_mark_page_lossy(TIDBitmap
*tbm
, BlockNumber pageno
)
826 PagetableEntry
*page
;
828 BlockNumber chunk_pageno
;
833 /* We force the bitmap into hashtable mode whenever it's lossy */
834 if (tbm
->status
!= TBM_HASH
)
835 tbm_create_pagetable(tbm
);
837 bitno
= pageno
% PAGES_PER_CHUNK
;
838 chunk_pageno
= pageno
- bitno
;
841 * Remove any extant non-lossy entry for the page. If the page is its own
842 * chunk header, however, we skip this and handle the case below.
846 if (hash_search(tbm
->pagetable
,
848 HASH_REMOVE
, NULL
) != NULL
)
850 /* It was present, so adjust counts */
852 tbm
->npages
--; /* assume it must have been non-lossy */
856 /* Look up or create entry for chunk-header page */
857 page
= (PagetableEntry
*) hash_search(tbm
->pagetable
,
858 (void *) &chunk_pageno
,
861 /* Initialize it if not present before */
864 MemSet(page
, 0, sizeof(PagetableEntry
));
865 page
->blockno
= chunk_pageno
;
866 page
->ischunk
= true;
867 /* must count it too */
871 else if (!page
->ischunk
)
873 /* chunk header page was formerly non-lossy, make it lossy */
874 MemSet(page
, 0, sizeof(PagetableEntry
));
875 page
->blockno
= chunk_pageno
;
876 page
->ischunk
= true;
877 /* we assume it had some tuple bit(s) set, so mark it lossy */
878 page
->words
[0] = ((bitmapword
) 1 << 0);
884 /* Now set the original target page's bit */
885 wordnum
= WORDNUM(bitno
);
886 bitnum
= BITNUM(bitno
);
887 page
->words
[wordnum
] |= ((bitmapword
) 1 << bitnum
);
891 * tbm_lossify - lose some information to get back under the memory limit
894 tbm_lossify(TIDBitmap
*tbm
)
896 HASH_SEQ_STATUS status
;
897 PagetableEntry
*page
;
900 * XXX Really stupid implementation: this just lossifies pages in
901 * essentially random order. We should be paying some attention to the
902 * number of bits set in each page, instead. Also it might be a good idea
903 * to lossify more than the minimum number of pages during each call.
905 Assert(!tbm
->iterating
);
906 Assert(tbm
->status
== TBM_HASH
);
908 hash_seq_init(&status
, tbm
->pagetable
);
909 while ((page
= (PagetableEntry
*) hash_seq_search(&status
)) != NULL
)
912 continue; /* already a chunk header */
915 * If the page would become a chunk header, we won't save anything by
916 * converting it to lossy, so skip it.
918 if ((page
->blockno
% PAGES_PER_CHUNK
) == 0)
921 /* This does the dirty work ... */
922 tbm_mark_page_lossy(tbm
, page
->blockno
);
924 if (tbm
->nentries
<= tbm
->maxentries
)
926 /* we have done enough */
927 hash_seq_term(&status
);
932 * Note: tbm_mark_page_lossy may have inserted a lossy chunk into the
933 * hashtable. We can continue the same seq_search scan since we do
934 * not care whether we visit lossy chunks or not.
940 * qsort comparator to handle PagetableEntry pointers.
943 tbm_comparator(const void *left
, const void *right
)
945 BlockNumber l
= (*((const PagetableEntry
**) left
))->blockno
;
946 BlockNumber r
= (*((const PagetableEntry
**) right
))->blockno
;