nbtree: fix read page recheck typo.
[pgsql.git] / src / backend / executor / nodeBitmapHeapscan.c
blob3c63bdd93dff394915abc97e581923c18896927e
1 /*-------------------------------------------------------------------------
3 * nodeBitmapHeapscan.c
4 * Routines to support bitmapped scans of relations
6 * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 * special snapshots). The reason is that since index and heap scans are
9 * decoupled, there can be no assurance that the index tuple prompting a
10 * visit to a particular heap TID still exists when the visit is made.
11 * Therefore the tuple might not exist anymore either (which is OK because
12 * heap_fetch will cope) --- but worse, the tuple slot could have been
13 * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 * certain to fail the time qual and so it will not be mistakenly returned,
15 * but with anything else we might return a tuple that doesn't meet the
16 * required index qual conditions.
19 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
20 * Portions Copyright (c) 1994, Regents of the University of California
23 * IDENTIFICATION
24 * src/backend/executor/nodeBitmapHeapscan.c
26 *-------------------------------------------------------------------------
29 * INTERFACE ROUTINES
30 * ExecBitmapHeapScan scans a relation using bitmap info
31 * ExecBitmapHeapNext workhorse for above
32 * ExecInitBitmapHeapScan creates and initializes state info.
33 * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 * ExecEndBitmapHeapScan releases all storage.
36 #include "postgres.h"
38 #include <math.h>
40 #include "access/relscan.h"
41 #include "access/tableam.h"
42 #include "access/visibilitymap.h"
43 #include "executor/executor.h"
44 #include "executor/nodeBitmapHeapscan.h"
45 #include "miscadmin.h"
46 #include "pgstat.h"
47 #include "storage/bufmgr.h"
48 #include "utils/rel.h"
49 #include "utils/snapmgr.h"
50 #include "utils/spccache.h"
52 static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
53 static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
54 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
55 BlockNumber blockno);
56 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
57 static inline void BitmapPrefetch(BitmapHeapScanState *node,
58 TableScanDesc scan);
59 static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
62 /* ----------------------------------------------------------------
63 * BitmapHeapNext
65 * Retrieve next tuple from the BitmapHeapScan node's currentRelation
66 * ----------------------------------------------------------------
68 static TupleTableSlot *
69 BitmapHeapNext(BitmapHeapScanState *node)
71 ExprContext *econtext;
72 TableScanDesc scan;
73 TIDBitmap *tbm;
74 TBMIterator *tbmiterator = NULL;
75 TBMSharedIterator *shared_tbmiterator = NULL;
76 TBMIterateResult *tbmres;
77 TupleTableSlot *slot;
78 ParallelBitmapHeapState *pstate = node->pstate;
79 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
82 * extract necessary information from index scan node
84 econtext = node->ss.ps.ps_ExprContext;
85 slot = node->ss.ss_ScanTupleSlot;
86 scan = node->ss.ss_currentScanDesc;
87 tbm = node->tbm;
88 if (pstate == NULL)
89 tbmiterator = node->tbmiterator;
90 else
91 shared_tbmiterator = node->shared_tbmiterator;
92 tbmres = node->tbmres;
95 * If we haven't yet performed the underlying index scan, do it, and begin
96 * the iteration over the bitmap.
98 * For prefetching, we use *two* iterators, one for the pages we are
99 * actually scanning and another that runs ahead of the first for
100 * prefetching. node->prefetch_pages tracks exactly how many pages ahead
101 * the prefetch iterator is. Also, node->prefetch_target tracks the
102 * desired prefetch distance, which starts small and increases up to the
103 * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
104 * a scan that stops after a few tuples because of a LIMIT.
106 if (!node->initialized)
108 if (!pstate)
110 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
112 if (!tbm || !IsA(tbm, TIDBitmap))
113 elog(ERROR, "unrecognized result from subplan");
115 node->tbm = tbm;
116 node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
117 node->tbmres = tbmres = NULL;
119 #ifdef USE_PREFETCH
120 if (node->prefetch_maximum > 0)
122 node->prefetch_iterator = tbm_begin_iterate(tbm);
123 node->prefetch_pages = 0;
124 node->prefetch_target = -1;
126 #endif /* USE_PREFETCH */
128 else
131 * The leader will immediately come out of the function, but
132 * others will be blocked until leader populates the TBM and wakes
133 * them up.
135 if (BitmapShouldInitializeSharedState(pstate))
137 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
138 if (!tbm || !IsA(tbm, TIDBitmap))
139 elog(ERROR, "unrecognized result from subplan");
141 node->tbm = tbm;
144 * Prepare to iterate over the TBM. This will return the
145 * dsa_pointer of the iterator state which will be used by
146 * multiple processes to iterate jointly.
148 pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
149 #ifdef USE_PREFETCH
150 if (node->prefetch_maximum > 0)
152 pstate->prefetch_iterator =
153 tbm_prepare_shared_iterate(tbm);
156 * We don't need the mutex here as we haven't yet woke up
157 * others.
159 pstate->prefetch_pages = 0;
160 pstate->prefetch_target = -1;
162 #endif
164 /* We have initialized the shared state so wake up others. */
165 BitmapDoneInitializingSharedState(pstate);
168 /* Allocate a private iterator and attach the shared state to it */
169 node->shared_tbmiterator = shared_tbmiterator =
170 tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
171 node->tbmres = tbmres = NULL;
173 #ifdef USE_PREFETCH
174 if (node->prefetch_maximum > 0)
176 node->shared_prefetch_iterator =
177 tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator);
179 #endif /* USE_PREFETCH */
183 * If this is the first scan of the underlying table, create the table
184 * scan descriptor and begin the scan.
186 if (!scan)
188 bool need_tuples = false;
191 * We can potentially skip fetching heap pages if we do not need
192 * any columns of the table, either for checking non-indexable
193 * quals or for returning data. This test is a bit simplistic, as
194 * it checks the stronger condition that there's no qual or return
195 * tlist at all. But in most cases it's probably not worth working
196 * harder than that.
198 need_tuples = (node->ss.ps.plan->qual != NIL ||
199 node->ss.ps.plan->targetlist != NIL);
201 scan = table_beginscan_bm(node->ss.ss_currentRelation,
202 node->ss.ps.state->es_snapshot,
204 NULL,
205 need_tuples);
207 node->ss.ss_currentScanDesc = scan;
210 node->initialized = true;
213 for (;;)
215 bool valid_block;
217 CHECK_FOR_INTERRUPTS();
220 * Get next page of results if needed
222 if (tbmres == NULL)
224 if (!pstate)
225 node->tbmres = tbmres = tbm_iterate(tbmiterator);
226 else
227 node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
228 if (tbmres == NULL)
230 /* no more entries in the bitmap */
231 break;
234 BitmapAdjustPrefetchIterator(node, tbmres->blockno);
236 valid_block = table_scan_bitmap_next_block(scan, tbmres);
238 if (tbmres->ntuples >= 0)
239 node->stats.exact_pages++;
240 else
241 node->stats.lossy_pages++;
243 if (!valid_block)
245 /* AM doesn't think this block is valid, skip */
246 continue;
249 /* Adjust the prefetch target */
250 BitmapAdjustPrefetchTarget(node);
252 else
255 * Continuing in previously obtained page.
258 #ifdef USE_PREFETCH
261 * Try to prefetch at least a few pages even before we get to the
262 * second page if we don't stop reading after the first tuple.
264 if (!pstate)
266 if (node->prefetch_target < node->prefetch_maximum)
267 node->prefetch_target++;
269 else if (pstate->prefetch_target < node->prefetch_maximum)
271 /* take spinlock while updating shared state */
272 SpinLockAcquire(&pstate->mutex);
273 if (pstate->prefetch_target < node->prefetch_maximum)
274 pstate->prefetch_target++;
275 SpinLockRelease(&pstate->mutex);
277 #endif /* USE_PREFETCH */
281 * We issue prefetch requests *after* fetching the current page to try
282 * to avoid having prefetching interfere with the main I/O. Also, this
283 * should happen only when we have determined there is still something
284 * to do on the current page, else we may uselessly prefetch the same
285 * page we are just about to request for real.
287 BitmapPrefetch(node, scan);
290 * Attempt to fetch tuple from AM.
292 if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
294 /* nothing more to look at on this page */
295 node->tbmres = tbmres = NULL;
296 continue;
300 * If we are using lossy info, we have to recheck the qual conditions
301 * at every tuple.
303 if (tbmres->recheck)
305 econtext->ecxt_scantuple = slot;
306 if (!ExecQualAndReset(node->bitmapqualorig, econtext))
308 /* Fails recheck, so drop it and loop back for another */
309 InstrCountFiltered2(node, 1);
310 ExecClearTuple(slot);
311 continue;
315 /* OK to return this tuple */
316 return slot;
320 * if we get here it means we are at the end of the scan..
322 return ExecClearTuple(slot);
326 * BitmapDoneInitializingSharedState - Shared state is initialized
328 * By this time the leader has already populated the TBM and initialized the
329 * shared state so wake up other processes.
331 static inline void
332 BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
334 SpinLockAcquire(&pstate->mutex);
335 pstate->state = BM_FINISHED;
336 SpinLockRelease(&pstate->mutex);
337 ConditionVariableBroadcast(&pstate->cv);
341 * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
343 static inline void
344 BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
345 BlockNumber blockno)
347 #ifdef USE_PREFETCH
348 ParallelBitmapHeapState *pstate = node->pstate;
350 if (pstate == NULL)
352 TBMIterator *prefetch_iterator = node->prefetch_iterator;
354 if (node->prefetch_pages > 0)
356 /* The main iterator has closed the distance by one page */
357 node->prefetch_pages--;
359 else if (prefetch_iterator)
361 /* Do not let the prefetch iterator get behind the main one */
362 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
364 if (tbmpre == NULL || tbmpre->blockno != blockno)
365 elog(ERROR, "prefetch and main iterators are out of sync");
367 return;
370 if (node->prefetch_maximum > 0)
372 TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
374 SpinLockAcquire(&pstate->mutex);
375 if (pstate->prefetch_pages > 0)
377 pstate->prefetch_pages--;
378 SpinLockRelease(&pstate->mutex);
380 else
382 /* Release the mutex before iterating */
383 SpinLockRelease(&pstate->mutex);
386 * In case of shared mode, we can not ensure that the current
387 * blockno of the main iterator and that of the prefetch iterator
388 * are same. It's possible that whatever blockno we are
389 * prefetching will be processed by another process. Therefore,
390 * we don't validate the blockno here as we do in non-parallel
391 * case.
393 if (prefetch_iterator)
394 tbm_shared_iterate(prefetch_iterator);
397 #endif /* USE_PREFETCH */
401 * BitmapAdjustPrefetchTarget - Adjust the prefetch target
403 * Increase prefetch target if it's not yet at the max. Note that
404 * we will increase it to zero after fetching the very first
405 * page/tuple, then to one after the second tuple is fetched, then
406 * it doubles as later pages are fetched.
408 static inline void
409 BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
411 #ifdef USE_PREFETCH
412 ParallelBitmapHeapState *pstate = node->pstate;
414 if (pstate == NULL)
416 if (node->prefetch_target >= node->prefetch_maximum)
417 /* don't increase any further */ ;
418 else if (node->prefetch_target >= node->prefetch_maximum / 2)
419 node->prefetch_target = node->prefetch_maximum;
420 else if (node->prefetch_target > 0)
421 node->prefetch_target *= 2;
422 else
423 node->prefetch_target++;
424 return;
427 /* Do an unlocked check first to save spinlock acquisitions. */
428 if (pstate->prefetch_target < node->prefetch_maximum)
430 SpinLockAcquire(&pstate->mutex);
431 if (pstate->prefetch_target >= node->prefetch_maximum)
432 /* don't increase any further */ ;
433 else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
434 pstate->prefetch_target = node->prefetch_maximum;
435 else if (pstate->prefetch_target > 0)
436 pstate->prefetch_target *= 2;
437 else
438 pstate->prefetch_target++;
439 SpinLockRelease(&pstate->mutex);
441 #endif /* USE_PREFETCH */
445 * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
447 static inline void
448 BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
450 #ifdef USE_PREFETCH
451 ParallelBitmapHeapState *pstate = node->pstate;
453 if (pstate == NULL)
455 TBMIterator *prefetch_iterator = node->prefetch_iterator;
457 if (prefetch_iterator)
459 while (node->prefetch_pages < node->prefetch_target)
461 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
462 bool skip_fetch;
464 if (tbmpre == NULL)
466 /* No more pages to prefetch */
467 tbm_end_iterate(prefetch_iterator);
468 node->prefetch_iterator = NULL;
469 break;
471 node->prefetch_pages++;
474 * If we expect not to have to actually read this heap page,
475 * skip this prefetch call, but continue to run the prefetch
476 * logic normally. (Would it be better not to increment
477 * prefetch_pages?)
479 skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
480 !tbmpre->recheck &&
481 VM_ALL_VISIBLE(node->ss.ss_currentRelation,
482 tbmpre->blockno,
483 &node->pvmbuffer));
485 if (!skip_fetch)
486 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
490 return;
493 if (pstate->prefetch_pages < pstate->prefetch_target)
495 TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
497 if (prefetch_iterator)
499 while (1)
501 TBMIterateResult *tbmpre;
502 bool do_prefetch = false;
503 bool skip_fetch;
506 * Recheck under the mutex. If some other process has already
507 * done enough prefetching then we need not to do anything.
509 SpinLockAcquire(&pstate->mutex);
510 if (pstate->prefetch_pages < pstate->prefetch_target)
512 pstate->prefetch_pages++;
513 do_prefetch = true;
515 SpinLockRelease(&pstate->mutex);
517 if (!do_prefetch)
518 return;
520 tbmpre = tbm_shared_iterate(prefetch_iterator);
521 if (tbmpre == NULL)
523 /* No more pages to prefetch */
524 tbm_end_shared_iterate(prefetch_iterator);
525 node->shared_prefetch_iterator = NULL;
526 break;
529 /* As above, skip prefetch if we expect not to need page */
530 skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
531 !tbmpre->recheck &&
532 VM_ALL_VISIBLE(node->ss.ss_currentRelation,
533 tbmpre->blockno,
534 &node->pvmbuffer));
536 if (!skip_fetch)
537 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
541 #endif /* USE_PREFETCH */
545 * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
547 static bool
548 BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
550 ExprContext *econtext;
553 * extract necessary information from index scan node
555 econtext = node->ss.ps.ps_ExprContext;
557 /* Does the tuple meet the original qual conditions? */
558 econtext->ecxt_scantuple = slot;
559 return ExecQualAndReset(node->bitmapqualorig, econtext);
562 /* ----------------------------------------------------------------
563 * ExecBitmapHeapScan(node)
564 * ----------------------------------------------------------------
566 static TupleTableSlot *
567 ExecBitmapHeapScan(PlanState *pstate)
569 BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
571 return ExecScan(&node->ss,
572 (ExecScanAccessMtd) BitmapHeapNext,
573 (ExecScanRecheckMtd) BitmapHeapRecheck);
576 /* ----------------------------------------------------------------
577 * ExecReScanBitmapHeapScan(node)
578 * ----------------------------------------------------------------
580 void
581 ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
583 PlanState *outerPlan = outerPlanState(node);
585 /* rescan to release any page pin */
586 if (node->ss.ss_currentScanDesc)
587 table_rescan(node->ss.ss_currentScanDesc, NULL);
589 /* release bitmaps and buffers if any */
590 if (node->tbmiterator)
591 tbm_end_iterate(node->tbmiterator);
592 if (node->prefetch_iterator)
593 tbm_end_iterate(node->prefetch_iterator);
594 if (node->shared_tbmiterator)
595 tbm_end_shared_iterate(node->shared_tbmiterator);
596 if (node->shared_prefetch_iterator)
597 tbm_end_shared_iterate(node->shared_prefetch_iterator);
598 if (node->tbm)
599 tbm_free(node->tbm);
600 if (node->pvmbuffer != InvalidBuffer)
601 ReleaseBuffer(node->pvmbuffer);
602 node->tbm = NULL;
603 node->tbmiterator = NULL;
604 node->tbmres = NULL;
605 node->prefetch_iterator = NULL;
606 node->initialized = false;
607 node->shared_tbmiterator = NULL;
608 node->shared_prefetch_iterator = NULL;
609 node->pvmbuffer = InvalidBuffer;
611 ExecScanReScan(&node->ss);
614 * if chgParam of subnode is not null then plan will be re-scanned by
615 * first ExecProcNode.
617 if (outerPlan->chgParam == NULL)
618 ExecReScan(outerPlan);
621 /* ----------------------------------------------------------------
622 * ExecEndBitmapHeapScan
623 * ----------------------------------------------------------------
625 void
626 ExecEndBitmapHeapScan(BitmapHeapScanState *node)
628 TableScanDesc scanDesc;
631 * When ending a parallel worker, copy the statistics gathered by the
632 * worker back into shared memory so that it can be picked up by the main
633 * process to report in EXPLAIN ANALYZE.
635 if (node->sinstrument != NULL && IsParallelWorker())
637 BitmapHeapScanInstrumentation *si;
639 Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
640 si = &node->sinstrument->sinstrument[ParallelWorkerNumber];
643 * Here we accumulate the stats rather than performing memcpy on
644 * node->stats into si. When a Gather/GatherMerge node finishes it
645 * will perform planner shutdown on the workers. On rescan it will
646 * spin up new workers which will have a new BitmapHeapScanState and
647 * zeroed stats.
649 si->exact_pages += node->stats.exact_pages;
650 si->lossy_pages += node->stats.lossy_pages;
654 * extract information from the node
656 scanDesc = node->ss.ss_currentScanDesc;
659 * close down subplans
661 ExecEndNode(outerPlanState(node));
664 * release bitmaps and buffers if any
666 if (node->tbmiterator)
667 tbm_end_iterate(node->tbmiterator);
668 if (node->prefetch_iterator)
669 tbm_end_iterate(node->prefetch_iterator);
670 if (node->tbm)
671 tbm_free(node->tbm);
672 if (node->shared_tbmiterator)
673 tbm_end_shared_iterate(node->shared_tbmiterator);
674 if (node->shared_prefetch_iterator)
675 tbm_end_shared_iterate(node->shared_prefetch_iterator);
676 if (node->pvmbuffer != InvalidBuffer)
677 ReleaseBuffer(node->pvmbuffer);
680 * close heap scan
682 if (scanDesc)
683 table_endscan(scanDesc);
687 /* ----------------------------------------------------------------
688 * ExecInitBitmapHeapScan
690 * Initializes the scan's state information.
691 * ----------------------------------------------------------------
693 BitmapHeapScanState *
694 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
696 BitmapHeapScanState *scanstate;
697 Relation currentRelation;
699 /* check for unsupported flags */
700 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
703 * Assert caller didn't ask for an unsafe snapshot --- see comments at
704 * head of file.
706 Assert(IsMVCCSnapshot(estate->es_snapshot));
709 * create state structure
711 scanstate = makeNode(BitmapHeapScanState);
712 scanstate->ss.ps.plan = (Plan *) node;
713 scanstate->ss.ps.state = estate;
714 scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
716 scanstate->tbm = NULL;
717 scanstate->tbmiterator = NULL;
718 scanstate->tbmres = NULL;
719 scanstate->pvmbuffer = InvalidBuffer;
721 /* Zero the statistics counters */
722 memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation));
724 scanstate->prefetch_iterator = NULL;
725 scanstate->prefetch_pages = 0;
726 scanstate->prefetch_target = 0;
727 scanstate->initialized = false;
728 scanstate->shared_tbmiterator = NULL;
729 scanstate->shared_prefetch_iterator = NULL;
730 scanstate->pstate = NULL;
733 * Miscellaneous initialization
735 * create expression context for node
737 ExecAssignExprContext(estate, &scanstate->ss.ps);
740 * open the scan relation
742 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
745 * initialize child nodes
747 outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
750 * get the scan type from the relation descriptor.
752 ExecInitScanTupleSlot(estate, &scanstate->ss,
753 RelationGetDescr(currentRelation),
754 table_slot_callbacks(currentRelation));
757 * Initialize result type and projection.
759 ExecInitResultTypeTL(&scanstate->ss.ps);
760 ExecAssignScanProjectionInfo(&scanstate->ss);
763 * initialize child expressions
765 scanstate->ss.ps.qual =
766 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
767 scanstate->bitmapqualorig =
768 ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
771 * Maximum number of prefetches for the tablespace if configured,
772 * otherwise the current value of the effective_io_concurrency GUC.
774 scanstate->prefetch_maximum =
775 get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
777 scanstate->ss.ss_currentRelation = currentRelation;
780 * all done.
782 return scanstate;
785 /*----------------
786 * BitmapShouldInitializeSharedState
788 * The first process to come here and see the state to the BM_INITIAL
789 * will become the leader for the parallel bitmap scan and will be
790 * responsible for populating the TIDBitmap. The other processes will
791 * be blocked by the condition variable until the leader wakes them up.
792 * ---------------
794 static bool
795 BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
797 SharedBitmapState state;
799 while (1)
801 SpinLockAcquire(&pstate->mutex);
802 state = pstate->state;
803 if (pstate->state == BM_INITIAL)
804 pstate->state = BM_INPROGRESS;
805 SpinLockRelease(&pstate->mutex);
807 /* Exit if bitmap is done, or if we're the leader. */
808 if (state != BM_INPROGRESS)
809 break;
811 /* Wait for the leader to wake us up. */
812 ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
815 ConditionVariableCancelSleep();
817 return (state == BM_INITIAL);
820 /* ----------------------------------------------------------------
821 * ExecBitmapHeapEstimate
823 * Compute the amount of space we'll need in the parallel
824 * query DSM, and inform pcxt->estimator about our needs.
825 * ----------------------------------------------------------------
827 void
828 ExecBitmapHeapEstimate(BitmapHeapScanState *node,
829 ParallelContext *pcxt)
831 Size size;
833 size = MAXALIGN(sizeof(ParallelBitmapHeapState));
835 /* account for instrumentation, if required */
836 if (node->ss.ps.instrument && pcxt->nworkers > 0)
838 size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
839 size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
842 shm_toc_estimate_chunk(&pcxt->estimator, size);
843 shm_toc_estimate_keys(&pcxt->estimator, 1);
846 /* ----------------------------------------------------------------
847 * ExecBitmapHeapInitializeDSM
849 * Set up a parallel bitmap heap scan descriptor.
850 * ----------------------------------------------------------------
852 void
853 ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
854 ParallelContext *pcxt)
856 ParallelBitmapHeapState *pstate;
857 SharedBitmapHeapInstrumentation *sinstrument = NULL;
858 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
859 char *ptr;
860 Size size;
862 /* If there's no DSA, there are no workers; initialize nothing. */
863 if (dsa == NULL)
864 return;
866 size = MAXALIGN(sizeof(ParallelBitmapHeapState));
867 if (node->ss.ps.instrument && pcxt->nworkers > 0)
869 size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
870 size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
873 ptr = shm_toc_allocate(pcxt->toc, size);
874 pstate = (ParallelBitmapHeapState *) ptr;
875 ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
876 if (node->ss.ps.instrument && pcxt->nworkers > 0)
877 sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
879 pstate->tbmiterator = 0;
880 pstate->prefetch_iterator = 0;
882 /* Initialize the mutex */
883 SpinLockInit(&pstate->mutex);
884 pstate->prefetch_pages = 0;
885 pstate->prefetch_target = 0;
886 pstate->state = BM_INITIAL;
888 ConditionVariableInit(&pstate->cv);
890 if (sinstrument)
892 sinstrument->num_workers = pcxt->nworkers;
894 /* ensure any unfilled slots will contain zeroes */
895 memset(sinstrument->sinstrument, 0,
896 pcxt->nworkers * sizeof(BitmapHeapScanInstrumentation));
899 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
900 node->pstate = pstate;
901 node->sinstrument = sinstrument;
904 /* ----------------------------------------------------------------
905 * ExecBitmapHeapReInitializeDSM
907 * Reset shared state before beginning a fresh scan.
908 * ----------------------------------------------------------------
910 void
911 ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
912 ParallelContext *pcxt)
914 ParallelBitmapHeapState *pstate = node->pstate;
915 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
917 /* If there's no DSA, there are no workers; do nothing. */
918 if (dsa == NULL)
919 return;
921 pstate->state = BM_INITIAL;
923 if (DsaPointerIsValid(pstate->tbmiterator))
924 tbm_free_shared_area(dsa, pstate->tbmiterator);
926 if (DsaPointerIsValid(pstate->prefetch_iterator))
927 tbm_free_shared_area(dsa, pstate->prefetch_iterator);
929 pstate->tbmiterator = InvalidDsaPointer;
930 pstate->prefetch_iterator = InvalidDsaPointer;
933 /* ----------------------------------------------------------------
934 * ExecBitmapHeapInitializeWorker
936 * Copy relevant information from TOC into planstate.
937 * ----------------------------------------------------------------
939 void
940 ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
941 ParallelWorkerContext *pwcxt)
943 char *ptr;
945 Assert(node->ss.ps.state->es_query_dsa != NULL);
947 ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
949 node->pstate = (ParallelBitmapHeapState *) ptr;
950 ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
952 if (node->ss.ps.instrument)
953 node->sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
956 /* ----------------------------------------------------------------
957 * ExecBitmapHeapRetrieveInstrumentation
959 * Transfer bitmap heap scan statistics from DSM to private memory.
960 * ----------------------------------------------------------------
962 void
963 ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
965 SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
966 Size size;
968 if (sinstrument == NULL)
969 return;
971 size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
972 + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
974 node->sinstrument = palloc(size);
975 memcpy(node->sinstrument, sinstrument, size);