1 /*-------------------------------------------------------------------------
4 * Routines to support bitmapped scans of relations
6 * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 * special snapshots). The reason is that since index and heap scans are
9 * decoupled, there can be no assurance that the index tuple prompting a
10 * visit to a particular heap TID still exists when the visit is made.
11 * Therefore the tuple might not exist anymore either (which is OK because
12 * heap_fetch will cope) --- but worse, the tuple slot could have been
13 * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 * certain to fail the time qual and so it will not be mistakenly returned,
15 * but with anything else we might return a tuple that doesn't meet the
16 * required index qual conditions.
19 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
20 * Portions Copyright (c) 1994, Regents of the University of California
24 * src/backend/executor/nodeBitmapHeapscan.c
26 *-------------------------------------------------------------------------
30 * ExecBitmapHeapScan scans a relation using bitmap info
31 * ExecBitmapHeapNext workhorse for above
32 * ExecInitBitmapHeapScan creates and initializes state info.
33 * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 * ExecEndBitmapHeapScan releases all storage.
40 #include "access/relscan.h"
41 #include "access/tableam.h"
42 #include "access/visibilitymap.h"
43 #include "executor/executor.h"
44 #include "executor/nodeBitmapHeapscan.h"
45 #include "miscadmin.h"
47 #include "storage/bufmgr.h"
48 #include "utils/rel.h"
49 #include "utils/spccache.h"
51 static TupleTableSlot
*BitmapHeapNext(BitmapHeapScanState
*node
);
52 static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState
*pstate
);
53 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState
*node
);
54 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState
*node
);
55 static inline void BitmapPrefetch(BitmapHeapScanState
*node
,
57 static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState
*pstate
);
60 /* ----------------------------------------------------------------
63 * Retrieve next tuple from the BitmapHeapScan node's currentRelation
64 * ----------------------------------------------------------------
66 static TupleTableSlot
*
67 BitmapHeapNext(BitmapHeapScanState
*node
)
69 ExprContext
*econtext
;
73 ParallelBitmapHeapState
*pstate
= node
->pstate
;
74 dsa_area
*dsa
= node
->ss
.ps
.state
->es_query_dsa
;
77 * extract necessary information from index scan node
79 econtext
= node
->ss
.ps
.ps_ExprContext
;
80 slot
= node
->ss
.ss_ScanTupleSlot
;
81 scan
= node
->ss
.ss_currentScanDesc
;
85 * If we haven't yet performed the underlying index scan, do it, and begin
86 * the iteration over the bitmap.
88 * For prefetching, we use *two* iterators, one for the pages we are
89 * actually scanning and another that runs ahead of the first for
90 * prefetching. node->prefetch_pages tracks exactly how many pages ahead
91 * the prefetch iterator is. Also, node->prefetch_target tracks the
92 * desired prefetch distance, which starts small and increases up to the
93 * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
94 * a scan that stops after a few tuples because of a LIMIT.
96 if (!node
->initialized
)
98 TBMIterator tbmiterator
;
102 tbm
= (TIDBitmap
*) MultiExecProcNode(outerPlanState(node
));
104 if (!tbm
|| !IsA(tbm
, TIDBitmap
))
105 elog(ERROR
, "unrecognized result from subplan");
109 else if (BitmapShouldInitializeSharedState(pstate
))
112 * The leader will immediately come out of the function, but
113 * others will be blocked until leader populates the TBM and wakes
116 tbm
= (TIDBitmap
*) MultiExecProcNode(outerPlanState(node
));
117 if (!tbm
|| !IsA(tbm
, TIDBitmap
))
118 elog(ERROR
, "unrecognized result from subplan");
123 * Prepare to iterate over the TBM. This will return the
124 * dsa_pointer of the iterator state which will be used by
125 * multiple processes to iterate jointly.
127 pstate
->tbmiterator
= tbm_prepare_shared_iterate(tbm
);
130 if (node
->prefetch_maximum
> 0)
132 pstate
->prefetch_iterator
=
133 tbm_prepare_shared_iterate(tbm
);
135 #endif /* USE_PREFETCH */
137 /* We have initialized the shared state so wake up others. */
138 BitmapDoneInitializingSharedState(pstate
);
141 tbmiterator
= tbm_begin_iterate(tbm
, dsa
,
143 pstate
->tbmiterator
:
147 if (node
->prefetch_maximum
> 0)
148 node
->prefetch_iterator
=
149 tbm_begin_iterate(tbm
, dsa
,
151 pstate
->prefetch_iterator
:
153 #endif /* USE_PREFETCH */
156 * If this is the first scan of the underlying table, create the table
157 * scan descriptor and begin the scan.
161 bool need_tuples
= false;
164 * We can potentially skip fetching heap pages if we do not need
165 * any columns of the table, either for checking non-indexable
166 * quals or for returning data. This test is a bit simplistic, as
167 * it checks the stronger condition that there's no qual or return
168 * tlist at all. But in most cases it's probably not worth working
171 need_tuples
= (node
->ss
.ps
.plan
->qual
!= NIL
||
172 node
->ss
.ps
.plan
->targetlist
!= NIL
);
174 scan
= table_beginscan_bm(node
->ss
.ss_currentRelation
,
175 node
->ss
.ps
.state
->es_snapshot
,
180 node
->ss
.ss_currentScanDesc
= scan
;
183 scan
->st
.rs_tbmiterator
= tbmiterator
;
184 node
->initialized
= true;
191 while (table_scan_bitmap_next_tuple(scan
, slot
))
194 * Continuing in previously obtained page.
197 CHECK_FOR_INTERRUPTS();
202 * Try to prefetch at least a few pages even before we get to the
203 * second page if we don't stop reading after the first tuple.
207 if (node
->prefetch_target
< node
->prefetch_maximum
)
208 node
->prefetch_target
++;
210 else if (pstate
->prefetch_target
< node
->prefetch_maximum
)
212 /* take spinlock while updating shared state */
213 SpinLockAcquire(&pstate
->mutex
);
214 if (pstate
->prefetch_target
< node
->prefetch_maximum
)
215 pstate
->prefetch_target
++;
216 SpinLockRelease(&pstate
->mutex
);
218 #endif /* USE_PREFETCH */
221 * We issue prefetch requests *after* fetching the current page to
222 * try to avoid having prefetching interfere with the main I/O.
223 * Also, this should happen only when we have determined there is
224 * still something to do on the current page, else we may
225 * uselessly prefetch the same page we are just about to request
228 BitmapPrefetch(node
, scan
);
231 * If we are using lossy info, we have to recheck the qual
232 * conditions at every tuple.
236 econtext
->ecxt_scantuple
= slot
;
237 if (!ExecQualAndReset(node
->bitmapqualorig
, econtext
))
239 /* Fails recheck, so drop it and loop back for another */
240 InstrCountFiltered2(node
, 1);
241 ExecClearTuple(slot
);
246 /* OK to return this tuple */
252 BitmapAdjustPrefetchIterator(node
);
255 * Returns false if the bitmap is exhausted and there are no further
256 * blocks we need to scan.
258 if (!table_scan_bitmap_next_block(scan
, &node
->blockno
,
260 &node
->stats
.lossy_pages
,
261 &node
->stats
.exact_pages
))
265 * If serial, we can error out if the prefetch block doesn't stay
266 * ahead of the current block.
268 if (node
->pstate
== NULL
&&
269 !tbm_exhausted(&node
->prefetch_iterator
) &&
270 node
->prefetch_blockno
< node
->blockno
)
272 "prefetch and main iterators are out of sync. pfblockno: %d. blockno: %d",
273 node
->prefetch_blockno
, node
->blockno
);
275 /* Adjust the prefetch target */
276 BitmapAdjustPrefetchTarget(node
);
280 * if we get here it means we are at the end of the scan..
282 return ExecClearTuple(slot
);
286 * BitmapDoneInitializingSharedState - Shared state is initialized
288 * By this time the leader has already populated the TBM and initialized the
289 * shared state so wake up other processes.
292 BitmapDoneInitializingSharedState(ParallelBitmapHeapState
*pstate
)
294 SpinLockAcquire(&pstate
->mutex
);
295 pstate
->state
= BM_FINISHED
;
296 SpinLockRelease(&pstate
->mutex
);
297 ConditionVariableBroadcast(&pstate
->cv
);
301 * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
303 * We keep track of how far the prefetch iterator is ahead of the main
304 * iterator in prefetch_pages. For each block the main iterator returns, we
305 * decrement prefetch_pages.
308 BitmapAdjustPrefetchIterator(BitmapHeapScanState
*node
)
311 ParallelBitmapHeapState
*pstate
= node
->pstate
;
312 TBMIterateResult
*tbmpre
;
316 TBMIterator
*prefetch_iterator
= &node
->prefetch_iterator
;
318 if (node
->prefetch_pages
> 0)
320 /* The main iterator has closed the distance by one page */
321 node
->prefetch_pages
--;
323 else if (!tbm_exhausted(prefetch_iterator
))
325 tbmpre
= tbm_iterate(prefetch_iterator
);
326 node
->prefetch_blockno
= tbmpre
? tbmpre
->blockno
:
333 * XXX: There is a known issue with keeping the prefetch and current block
334 * iterators in sync for parallel bitmap table scans. This can lead to
335 * prefetching blocks that have already been read. See the discussion
337 * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de
338 * Note that moving the call site of BitmapAdjustPrefetchIterator()
339 * exacerbates the effects of this bug.
341 if (node
->prefetch_maximum
> 0)
343 TBMIterator
*prefetch_iterator
= &node
->prefetch_iterator
;
345 SpinLockAcquire(&pstate
->mutex
);
346 if (pstate
->prefetch_pages
> 0)
348 pstate
->prefetch_pages
--;
349 SpinLockRelease(&pstate
->mutex
);
353 /* Release the mutex before iterating */
354 SpinLockRelease(&pstate
->mutex
);
357 * In case of shared mode, we can not ensure that the current
358 * blockno of the main iterator and that of the prefetch iterator
359 * are same. It's possible that whatever blockno we are
360 * prefetching will be processed by another process. Therefore,
361 * we don't validate the blockno here as we do in non-parallel
364 if (!tbm_exhausted(prefetch_iterator
))
366 tbmpre
= tbm_iterate(prefetch_iterator
);
367 node
->prefetch_blockno
= tbmpre
? tbmpre
->blockno
:
372 #endif /* USE_PREFETCH */
376 * BitmapAdjustPrefetchTarget - Adjust the prefetch target
378 * Increase prefetch target if it's not yet at the max. Note that
379 * we will increase it to zero after fetching the very first
380 * page/tuple, then to one after the second tuple is fetched, then
381 * it doubles as later pages are fetched.
384 BitmapAdjustPrefetchTarget(BitmapHeapScanState
*node
)
387 ParallelBitmapHeapState
*pstate
= node
->pstate
;
391 if (node
->prefetch_target
>= node
->prefetch_maximum
)
392 /* don't increase any further */ ;
393 else if (node
->prefetch_target
>= node
->prefetch_maximum
/ 2)
394 node
->prefetch_target
= node
->prefetch_maximum
;
395 else if (node
->prefetch_target
> 0)
396 node
->prefetch_target
*= 2;
398 node
->prefetch_target
++;
402 /* Do an unlocked check first to save spinlock acquisitions. */
403 if (pstate
->prefetch_target
< node
->prefetch_maximum
)
405 SpinLockAcquire(&pstate
->mutex
);
406 if (pstate
->prefetch_target
>= node
->prefetch_maximum
)
407 /* don't increase any further */ ;
408 else if (pstate
->prefetch_target
>= node
->prefetch_maximum
/ 2)
409 pstate
->prefetch_target
= node
->prefetch_maximum
;
410 else if (pstate
->prefetch_target
> 0)
411 pstate
->prefetch_target
*= 2;
413 pstate
->prefetch_target
++;
414 SpinLockRelease(&pstate
->mutex
);
416 #endif /* USE_PREFETCH */
420 * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
423 BitmapPrefetch(BitmapHeapScanState
*node
, TableScanDesc scan
)
426 ParallelBitmapHeapState
*pstate
= node
->pstate
;
430 TBMIterator
*prefetch_iterator
= &node
->prefetch_iterator
;
432 if (!tbm_exhausted(prefetch_iterator
))
434 while (node
->prefetch_pages
< node
->prefetch_target
)
436 TBMIterateResult
*tbmpre
= tbm_iterate(prefetch_iterator
);
441 /* No more pages to prefetch */
442 tbm_end_iterate(prefetch_iterator
);
445 node
->prefetch_pages
++;
446 node
->prefetch_blockno
= tbmpre
->blockno
;
449 * If we expect not to have to actually read this heap page,
450 * skip this prefetch call, but continue to run the prefetch
451 * logic normally. (Would it be better not to increment
454 skip_fetch
= (!(scan
->rs_flags
& SO_NEED_TUPLES
) &&
456 VM_ALL_VISIBLE(node
->ss
.ss_currentRelation
,
461 PrefetchBuffer(scan
->rs_rd
, MAIN_FORKNUM
, tbmpre
->blockno
);
468 if (pstate
->prefetch_pages
< pstate
->prefetch_target
)
470 TBMIterator
*prefetch_iterator
= &node
->prefetch_iterator
;
472 if (!tbm_exhausted(prefetch_iterator
))
476 TBMIterateResult
*tbmpre
;
477 bool do_prefetch
= false;
481 * Recheck under the mutex. If some other process has already
482 * done enough prefetching then we need not to do anything.
484 SpinLockAcquire(&pstate
->mutex
);
485 if (pstate
->prefetch_pages
< pstate
->prefetch_target
)
487 pstate
->prefetch_pages
++;
490 SpinLockRelease(&pstate
->mutex
);
495 tbmpre
= tbm_iterate(prefetch_iterator
);
498 /* No more pages to prefetch */
499 tbm_end_iterate(prefetch_iterator
);
503 node
->prefetch_blockno
= tbmpre
->blockno
;
505 /* As above, skip prefetch if we expect not to need page */
506 skip_fetch
= (!(scan
->rs_flags
& SO_NEED_TUPLES
) &&
508 VM_ALL_VISIBLE(node
->ss
.ss_currentRelation
,
513 PrefetchBuffer(scan
->rs_rd
, MAIN_FORKNUM
, tbmpre
->blockno
);
517 #endif /* USE_PREFETCH */
521 * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
524 BitmapHeapRecheck(BitmapHeapScanState
*node
, TupleTableSlot
*slot
)
526 ExprContext
*econtext
;
529 * extract necessary information from index scan node
531 econtext
= node
->ss
.ps
.ps_ExprContext
;
533 /* Does the tuple meet the original qual conditions? */
534 econtext
->ecxt_scantuple
= slot
;
535 return ExecQualAndReset(node
->bitmapqualorig
, econtext
);
538 /* ----------------------------------------------------------------
539 * ExecBitmapHeapScan(node)
540 * ----------------------------------------------------------------
542 static TupleTableSlot
*
543 ExecBitmapHeapScan(PlanState
*pstate
)
545 BitmapHeapScanState
*node
= castNode(BitmapHeapScanState
, pstate
);
547 return ExecScan(&node
->ss
,
548 (ExecScanAccessMtd
) BitmapHeapNext
,
549 (ExecScanRecheckMtd
) BitmapHeapRecheck
);
552 /* ----------------------------------------------------------------
553 * ExecReScanBitmapHeapScan(node)
554 * ----------------------------------------------------------------
557 ExecReScanBitmapHeapScan(BitmapHeapScanState
*node
)
559 PlanState
*outerPlan
= outerPlanState(node
);
561 TableScanDesc scan
= node
->ss
.ss_currentScanDesc
;
566 * End iteration on iterators saved in scan descriptor if they have
567 * not already been cleaned up.
569 if (!tbm_exhausted(&scan
->st
.rs_tbmiterator
))
570 tbm_end_iterate(&scan
->st
.rs_tbmiterator
);
572 /* rescan to release any page pin */
573 table_rescan(node
->ss
.ss_currentScanDesc
, NULL
);
576 /* If we did not already clean up the prefetch iterator, do so now. */
577 if (!tbm_exhausted(&node
->prefetch_iterator
))
578 tbm_end_iterate(&node
->prefetch_iterator
);
580 /* release bitmaps and buffers if any */
583 if (node
->pvmbuffer
!= InvalidBuffer
)
584 ReleaseBuffer(node
->pvmbuffer
);
586 node
->initialized
= false;
587 node
->pvmbuffer
= InvalidBuffer
;
588 node
->recheck
= true;
589 /* Only used for serial BHS */
590 node
->blockno
= InvalidBlockNumber
;
591 node
->prefetch_blockno
= InvalidBlockNumber
;
592 node
->prefetch_pages
= 0;
593 node
->prefetch_target
= -1;
595 ExecScanReScan(&node
->ss
);
598 * if chgParam of subnode is not null then plan will be re-scanned by
599 * first ExecProcNode.
601 if (outerPlan
->chgParam
== NULL
)
602 ExecReScan(outerPlan
);
605 /* ----------------------------------------------------------------
606 * ExecEndBitmapHeapScan
607 * ----------------------------------------------------------------
610 ExecEndBitmapHeapScan(BitmapHeapScanState
*node
)
612 TableScanDesc scanDesc
;
615 * When ending a parallel worker, copy the statistics gathered by the
616 * worker back into shared memory so that it can be picked up by the main
617 * process to report in EXPLAIN ANALYZE.
619 if (node
->sinstrument
!= NULL
&& IsParallelWorker())
621 BitmapHeapScanInstrumentation
*si
;
623 Assert(ParallelWorkerNumber
<= node
->sinstrument
->num_workers
);
624 si
= &node
->sinstrument
->sinstrument
[ParallelWorkerNumber
];
627 * Here we accumulate the stats rather than performing memcpy on
628 * node->stats into si. When a Gather/GatherMerge node finishes it
629 * will perform planner shutdown on the workers. On rescan it will
630 * spin up new workers which will have a new BitmapHeapScanState and
633 si
->exact_pages
+= node
->stats
.exact_pages
;
634 si
->lossy_pages
+= node
->stats
.lossy_pages
;
638 * extract information from the node
640 scanDesc
= node
->ss
.ss_currentScanDesc
;
643 * close down subplans
645 ExecEndNode(outerPlanState(node
));
650 * End iteration on iterators saved in scan descriptor if they have
651 * not already been cleaned up.
653 if (!tbm_exhausted(&scanDesc
->st
.rs_tbmiterator
))
654 tbm_end_iterate(&scanDesc
->st
.rs_tbmiterator
);
659 table_endscan(scanDesc
);
662 /* If we did not already clean up the prefetch iterator, do so now. */
663 if (!tbm_exhausted(&node
->prefetch_iterator
))
664 tbm_end_iterate(&node
->prefetch_iterator
);
667 * release bitmaps and buffers if any
671 if (node
->pvmbuffer
!= InvalidBuffer
)
672 ReleaseBuffer(node
->pvmbuffer
);
675 /* ----------------------------------------------------------------
676 * ExecInitBitmapHeapScan
678 * Initializes the scan's state information.
679 * ----------------------------------------------------------------
681 BitmapHeapScanState
*
682 ExecInitBitmapHeapScan(BitmapHeapScan
*node
, EState
*estate
, int eflags
)
684 BitmapHeapScanState
*scanstate
;
685 Relation currentRelation
;
687 /* check for unsupported flags */
688 Assert(!(eflags
& (EXEC_FLAG_BACKWARD
| EXEC_FLAG_MARK
)));
691 * Assert caller didn't ask for an unsafe snapshot --- see comments at
694 Assert(IsMVCCSnapshot(estate
->es_snapshot
));
697 * create state structure
699 scanstate
= makeNode(BitmapHeapScanState
);
700 scanstate
->ss
.ps
.plan
= (Plan
*) node
;
701 scanstate
->ss
.ps
.state
= estate
;
702 scanstate
->ss
.ps
.ExecProcNode
= ExecBitmapHeapScan
;
704 scanstate
->tbm
= NULL
;
705 scanstate
->pvmbuffer
= InvalidBuffer
;
707 /* Zero the statistics counters */
708 memset(&scanstate
->stats
, 0, sizeof(BitmapHeapScanInstrumentation
));
710 scanstate
->prefetch_pages
= 0;
711 scanstate
->prefetch_target
= -1;
712 scanstate
->initialized
= false;
713 scanstate
->pstate
= NULL
;
714 scanstate
->recheck
= true;
715 scanstate
->blockno
= InvalidBlockNumber
;
716 scanstate
->prefetch_blockno
= InvalidBlockNumber
;
719 * Miscellaneous initialization
721 * create expression context for node
723 ExecAssignExprContext(estate
, &scanstate
->ss
.ps
);
726 * open the scan relation
728 currentRelation
= ExecOpenScanRelation(estate
, node
->scan
.scanrelid
, eflags
);
731 * initialize child nodes
733 outerPlanState(scanstate
) = ExecInitNode(outerPlan(node
), estate
, eflags
);
736 * get the scan type from the relation descriptor.
738 ExecInitScanTupleSlot(estate
, &scanstate
->ss
,
739 RelationGetDescr(currentRelation
),
740 table_slot_callbacks(currentRelation
));
743 * Initialize result type and projection.
745 ExecInitResultTypeTL(&scanstate
->ss
.ps
);
746 ExecAssignScanProjectionInfo(&scanstate
->ss
);
749 * initialize child expressions
751 scanstate
->ss
.ps
.qual
=
752 ExecInitQual(node
->scan
.plan
.qual
, (PlanState
*) scanstate
);
753 scanstate
->bitmapqualorig
=
754 ExecInitQual(node
->bitmapqualorig
, (PlanState
*) scanstate
);
757 * Maximum number of prefetches for the tablespace if configured,
758 * otherwise the current value of the effective_io_concurrency GUC.
760 scanstate
->prefetch_maximum
=
761 get_tablespace_io_concurrency(currentRelation
->rd_rel
->reltablespace
);
763 scanstate
->ss
.ss_currentRelation
= currentRelation
;
772 * BitmapShouldInitializeSharedState
774 * The first process to come here and see the state to the BM_INITIAL
775 * will become the leader for the parallel bitmap scan and will be
776 * responsible for populating the TIDBitmap. The other processes will
777 * be blocked by the condition variable until the leader wakes them up.
781 BitmapShouldInitializeSharedState(ParallelBitmapHeapState
*pstate
)
783 SharedBitmapState state
;
787 SpinLockAcquire(&pstate
->mutex
);
788 state
= pstate
->state
;
789 if (pstate
->state
== BM_INITIAL
)
790 pstate
->state
= BM_INPROGRESS
;
791 SpinLockRelease(&pstate
->mutex
);
793 /* Exit if bitmap is done, or if we're the leader. */
794 if (state
!= BM_INPROGRESS
)
797 /* Wait for the leader to wake us up. */
798 ConditionVariableSleep(&pstate
->cv
, WAIT_EVENT_PARALLEL_BITMAP_SCAN
);
801 ConditionVariableCancelSleep();
803 return (state
== BM_INITIAL
);
806 /* ----------------------------------------------------------------
807 * ExecBitmapHeapEstimate
809 * Compute the amount of space we'll need in the parallel
810 * query DSM, and inform pcxt->estimator about our needs.
811 * ----------------------------------------------------------------
814 ExecBitmapHeapEstimate(BitmapHeapScanState
*node
,
815 ParallelContext
*pcxt
)
819 size
= MAXALIGN(sizeof(ParallelBitmapHeapState
));
821 /* account for instrumentation, if required */
822 if (node
->ss
.ps
.instrument
&& pcxt
->nworkers
> 0)
824 size
= add_size(size
, offsetof(SharedBitmapHeapInstrumentation
, sinstrument
));
825 size
= add_size(size
, mul_size(pcxt
->nworkers
, sizeof(BitmapHeapScanInstrumentation
)));
828 shm_toc_estimate_chunk(&pcxt
->estimator
, size
);
829 shm_toc_estimate_keys(&pcxt
->estimator
, 1);
832 /* ----------------------------------------------------------------
833 * ExecBitmapHeapInitializeDSM
835 * Set up a parallel bitmap heap scan descriptor.
836 * ----------------------------------------------------------------
839 ExecBitmapHeapInitializeDSM(BitmapHeapScanState
*node
,
840 ParallelContext
*pcxt
)
842 ParallelBitmapHeapState
*pstate
;
843 SharedBitmapHeapInstrumentation
*sinstrument
= NULL
;
844 dsa_area
*dsa
= node
->ss
.ps
.state
->es_query_dsa
;
848 /* If there's no DSA, there are no workers; initialize nothing. */
852 size
= MAXALIGN(sizeof(ParallelBitmapHeapState
));
853 if (node
->ss
.ps
.instrument
&& pcxt
->nworkers
> 0)
855 size
= add_size(size
, offsetof(SharedBitmapHeapInstrumentation
, sinstrument
));
856 size
= add_size(size
, mul_size(pcxt
->nworkers
, sizeof(BitmapHeapScanInstrumentation
)));
859 ptr
= shm_toc_allocate(pcxt
->toc
, size
);
860 pstate
= (ParallelBitmapHeapState
*) ptr
;
861 ptr
+= MAXALIGN(sizeof(ParallelBitmapHeapState
));
862 if (node
->ss
.ps
.instrument
&& pcxt
->nworkers
> 0)
863 sinstrument
= (SharedBitmapHeapInstrumentation
*) ptr
;
865 pstate
->tbmiterator
= 0;
866 pstate
->prefetch_iterator
= 0;
868 /* Initialize the mutex */
869 SpinLockInit(&pstate
->mutex
);
870 pstate
->prefetch_pages
= 0;
871 pstate
->prefetch_target
= -1;
872 pstate
->state
= BM_INITIAL
;
874 ConditionVariableInit(&pstate
->cv
);
878 sinstrument
->num_workers
= pcxt
->nworkers
;
880 /* ensure any unfilled slots will contain zeroes */
881 memset(sinstrument
->sinstrument
, 0,
882 pcxt
->nworkers
* sizeof(BitmapHeapScanInstrumentation
));
885 shm_toc_insert(pcxt
->toc
, node
->ss
.ps
.plan
->plan_node_id
, pstate
);
886 node
->pstate
= pstate
;
887 node
->sinstrument
= sinstrument
;
890 /* ----------------------------------------------------------------
891 * ExecBitmapHeapReInitializeDSM
893 * Reset shared state before beginning a fresh scan.
894 * ----------------------------------------------------------------
897 ExecBitmapHeapReInitializeDSM(BitmapHeapScanState
*node
,
898 ParallelContext
*pcxt
)
900 ParallelBitmapHeapState
*pstate
= node
->pstate
;
901 dsa_area
*dsa
= node
->ss
.ps
.state
->es_query_dsa
;
903 /* If there's no DSA, there are no workers; do nothing. */
907 pstate
->state
= BM_INITIAL
;
908 pstate
->prefetch_pages
= 0;
909 pstate
->prefetch_target
= -1;
911 if (DsaPointerIsValid(pstate
->tbmiterator
))
912 tbm_free_shared_area(dsa
, pstate
->tbmiterator
);
914 if (DsaPointerIsValid(pstate
->prefetch_iterator
))
915 tbm_free_shared_area(dsa
, pstate
->prefetch_iterator
);
917 pstate
->tbmiterator
= InvalidDsaPointer
;
918 pstate
->prefetch_iterator
= InvalidDsaPointer
;
921 /* ----------------------------------------------------------------
922 * ExecBitmapHeapInitializeWorker
924 * Copy relevant information from TOC into planstate.
925 * ----------------------------------------------------------------
928 ExecBitmapHeapInitializeWorker(BitmapHeapScanState
*node
,
929 ParallelWorkerContext
*pwcxt
)
933 Assert(node
->ss
.ps
.state
->es_query_dsa
!= NULL
);
935 ptr
= shm_toc_lookup(pwcxt
->toc
, node
->ss
.ps
.plan
->plan_node_id
, false);
937 node
->pstate
= (ParallelBitmapHeapState
*) ptr
;
938 ptr
+= MAXALIGN(sizeof(ParallelBitmapHeapState
));
940 if (node
->ss
.ps
.instrument
)
941 node
->sinstrument
= (SharedBitmapHeapInstrumentation
*) ptr
;
944 /* ----------------------------------------------------------------
945 * ExecBitmapHeapRetrieveInstrumentation
947 * Transfer bitmap heap scan statistics from DSM to private memory.
948 * ----------------------------------------------------------------
951 ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState
*node
)
953 SharedBitmapHeapInstrumentation
*sinstrument
= node
->sinstrument
;
956 if (sinstrument
== NULL
)
959 size
= offsetof(SharedBitmapHeapInstrumentation
, sinstrument
)
960 + sinstrument
->num_workers
* sizeof(BitmapHeapScanInstrumentation
);
962 node
->sinstrument
= palloc(size
);
963 memcpy(node
->sinstrument
, sinstrument
, size
);