1 /*-------------------------------------------------------------------------
4 * Routines to support direct tid scans of relations
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
13 *-------------------------------------------------------------------------
18 * ExecTidScan scans a relation using tids
19 * ExecInitTidScan creates and initializes state info.
20 * ExecTidReScan rescans the tid relation.
21 * ExecEndTidScan releases all storage.
22 * ExecTidMarkPos marks scan position.
23 * ExecTidRestrPos restores scan position.
27 #include "access/heapam.h"
28 #include "access/sysattr.h"
29 #include "catalog/pg_type.h"
30 #include "executor/execdebug.h"
31 #include "executor/nodeTidscan.h"
32 #include "optimizer/clauses.h"
33 #include "storage/bufmgr.h"
34 #include "utils/array.h"
37 #define IsCTIDVar(node) \
40 ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
41 ((Var *) (node))->varlevelsup == 0)
43 static void TidListCreate(TidScanState
*tidstate
);
44 static int itemptr_comparator(const void *a
, const void *b
);
45 static TupleTableSlot
*TidNext(TidScanState
*node
);
49 * Compute the list of TIDs to be visited, by evaluating the expressions
52 * (The result is actually an array, not a list.)
55 TidListCreate(TidScanState
*tidstate
)
57 List
*evalList
= tidstate
->tss_tidquals
;
58 ExprContext
*econtext
= tidstate
->ss
.ps
.ps_ExprContext
;
60 ItemPointerData
*tidList
;
66 * We silently discard any TIDs that are out of range at the time of
67 * scan start. (Since we hold at least AccessShareLock on the table,
68 * it won't be possible for someone to truncate away the blocks we
71 nblocks
= RelationGetNumberOfBlocks(tidstate
->ss
.ss_currentRelation
);
74 * We initialize the array with enough slots for the case that all quals
75 * are simple OpExprs or CurrentOfExprs. If there are any
76 * ScalarArrayOpExprs, we may have to enlarge the array.
78 numAllocTids
= list_length(evalList
);
79 tidList
= (ItemPointerData
*)
80 palloc(numAllocTids
* sizeof(ItemPointerData
));
82 tidstate
->tss_isCurrentOf
= false;
86 ExprState
*exstate
= (ExprState
*) lfirst(l
);
87 Expr
*expr
= exstate
->expr
;
91 if (is_opclause(expr
))
93 FuncExprState
*fexstate
= (FuncExprState
*) exstate
;
97 arg1
= get_leftop(expr
);
98 arg2
= get_rightop(expr
);
100 exstate
= (ExprState
*) lsecond(fexstate
->args
);
101 else if (IsCTIDVar(arg2
))
102 exstate
= (ExprState
*) linitial(fexstate
->args
);
104 elog(ERROR
, "could not identify CTID variable");
106 itemptr
= (ItemPointer
)
107 DatumGetPointer(ExecEvalExprSwitchContext(exstate
,
112 ItemPointerIsValid(itemptr
) &&
113 ItemPointerGetBlockNumber(itemptr
) < nblocks
)
115 if (numTids
>= numAllocTids
)
118 tidList
= (ItemPointerData
*)
120 numAllocTids
* sizeof(ItemPointerData
));
122 tidList
[numTids
++] = *itemptr
;
125 else if (expr
&& IsA(expr
, ScalarArrayOpExpr
))
127 ScalarArrayOpExprState
*saexstate
= (ScalarArrayOpExprState
*) exstate
;
129 ArrayType
*itemarray
;
135 exstate
= (ExprState
*) lsecond(saexstate
->fxprstate
.args
);
136 arraydatum
= ExecEvalExprSwitchContext(exstate
,
142 itemarray
= DatumGetArrayTypeP(arraydatum
);
143 deconstruct_array(itemarray
,
144 TIDOID
, SizeOfIptrData
, false, 's',
145 &ipdatums
, &ipnulls
, &ndatums
);
146 if (numTids
+ ndatums
> numAllocTids
)
148 numAllocTids
= numTids
+ ndatums
;
149 tidList
= (ItemPointerData
*)
151 numAllocTids
* sizeof(ItemPointerData
));
153 for (i
= 0; i
< ndatums
; i
++)
157 itemptr
= (ItemPointer
) DatumGetPointer(ipdatums
[i
]);
158 if (ItemPointerIsValid(itemptr
) &&
159 ItemPointerGetBlockNumber(itemptr
) < nblocks
)
160 tidList
[numTids
++] = *itemptr
;
166 else if (expr
&& IsA(expr
, CurrentOfExpr
))
168 CurrentOfExpr
*cexpr
= (CurrentOfExpr
*) expr
;
169 ItemPointerData cursor_tid
;
171 if (execCurrentOf(cexpr
, econtext
,
172 RelationGetRelid(tidstate
->ss
.ss_currentRelation
),
175 if (numTids
>= numAllocTids
)
178 tidList
= (ItemPointerData
*)
180 numAllocTids
* sizeof(ItemPointerData
));
182 tidList
[numTids
++] = cursor_tid
;
183 tidstate
->tss_isCurrentOf
= true;
187 elog(ERROR
, "could not identify CTID expression");
191 * Sort the array of TIDs into order, and eliminate duplicates.
192 * Eliminating duplicates is necessary since we want OR semantics across
193 * the list. Sorting makes it easier to detect duplicates, and as a bonus
194 * ensures that we will visit the heap in the most efficient way.
201 /* CurrentOfExpr could never appear OR'd with something else */
202 Assert(!tidstate
->tss_isCurrentOf
);
204 qsort((void *) tidList
, numTids
, sizeof(ItemPointerData
),
207 for (i
= 1; i
< numTids
; i
++)
209 if (!ItemPointerEquals(&tidList
[lastTid
], &tidList
[i
]))
210 tidList
[++lastTid
] = tidList
[i
];
212 numTids
= lastTid
+ 1;
215 tidstate
->tss_TidList
= tidList
;
216 tidstate
->tss_NumTids
= numTids
;
217 tidstate
->tss_TidPtr
= -1;
221 * qsort comparator for ItemPointerData items
224 itemptr_comparator(const void *a
, const void *b
)
226 const ItemPointerData
*ipa
= (const ItemPointerData
*) a
;
227 const ItemPointerData
*ipb
= (const ItemPointerData
*) b
;
228 BlockNumber ba
= ItemPointerGetBlockNumber(ipa
);
229 BlockNumber bb
= ItemPointerGetBlockNumber(ipb
);
230 OffsetNumber oa
= ItemPointerGetOffsetNumber(ipa
);
231 OffsetNumber ob
= ItemPointerGetOffsetNumber(ipb
);
244 /* ----------------------------------------------------------------
247 * Retrieve a tuple from the TidScan node's currentRelation
248 * using the tids in the TidScanState information.
250 * ----------------------------------------------------------------
252 static TupleTableSlot
*
253 TidNext(TidScanState
*node
)
256 ScanDirection direction
;
258 Relation heapRelation
;
260 TupleTableSlot
*slot
;
262 Buffer buffer
= InvalidBuffer
;
263 ItemPointerData
*tidList
;
268 * extract necessary information from tid scan node
270 estate
= node
->ss
.ps
.state
;
271 direction
= estate
->es_direction
;
272 snapshot
= estate
->es_snapshot
;
273 heapRelation
= node
->ss
.ss_currentRelation
;
274 slot
= node
->ss
.ss_ScanTupleSlot
;
275 scanrelid
= ((TidScan
*) node
->ss
.ps
.plan
)->scan
.scanrelid
;
278 * Check if we are evaluating PlanQual for tuple of this relation.
279 * Additional checking is not good, but no other way for now. We could
280 * introduce new nodes for this case and handle TidScan --> NewNode
281 * switching in Init/ReScan plan...
283 if (estate
->es_evTuple
!= NULL
&&
284 estate
->es_evTuple
[scanrelid
- 1] != NULL
)
286 if (estate
->es_evTupleNull
[scanrelid
- 1])
287 return ExecClearTuple(slot
);
290 * XXX shouldn't we check here to make sure tuple matches TID list? In
291 * runtime-key case this is not certain, is it? However, in the WHERE
292 * CURRENT OF case it might not match anyway ...
295 ExecStoreTuple(estate
->es_evTuple
[scanrelid
- 1],
296 slot
, InvalidBuffer
, false);
298 /* Flag for the next call that no more tuples */
299 estate
->es_evTupleNull
[scanrelid
- 1] = true;
304 * First time through, compute the list of TIDs to be visited
306 if (node
->tss_TidList
== NULL
)
309 tidList
= node
->tss_TidList
;
310 numTids
= node
->tss_NumTids
;
312 tuple
= &(node
->tss_htup
);
315 * Initialize or advance scan position, depending on direction.
317 bBackward
= ScanDirectionIsBackward(direction
);
320 if (node
->tss_TidPtr
< 0)
322 /* initialize for backward scan */
323 node
->tss_TidPtr
= numTids
- 1;
330 if (node
->tss_TidPtr
< 0)
332 /* initialize for forward scan */
333 node
->tss_TidPtr
= 0;
339 while (node
->tss_TidPtr
>= 0 && node
->tss_TidPtr
< numTids
)
341 tuple
->t_self
= tidList
[node
->tss_TidPtr
];
344 * For WHERE CURRENT OF, the tuple retrieved from the cursor might
345 * since have been updated; if so, we should fetch the version that is
346 * current according to our snapshot.
348 if (node
->tss_isCurrentOf
)
349 heap_get_latest_tid(heapRelation
, snapshot
, &tuple
->t_self
);
351 if (heap_fetch(heapRelation
, snapshot
, tuple
, &buffer
, false, NULL
))
354 * store the scanned tuple in the scan tuple slot of the scan
355 * state. Eventually we will only do this and not return a tuple.
356 * Note: we pass 'false' because tuples returned by amgetnext are
357 * pointers onto disk pages and were not created with palloc() and
358 * so should not be pfree()'d.
360 ExecStoreTuple(tuple
, /* tuple to store */
361 slot
, /* slot to store in */
362 buffer
, /* buffer associated with tuple */
363 false); /* don't pfree */
366 * At this point we have an extra pin on the buffer, because
367 * ExecStoreTuple incremented the pin count. Drop our local pin.
369 ReleaseBuffer(buffer
);
373 /* Bad TID or failed snapshot qual; try next */
381 * if we get here it means the tid scan failed so we are at the end of the
384 return ExecClearTuple(slot
);
387 /* ----------------------------------------------------------------
390 * Scans the relation using tids and returns
391 * the next qualifying tuple in the direction specified.
392 * It calls ExecScan() and passes it the access methods which returns
393 * the next tuple using the tids.
396 * -- the "cursor" maintained by the AMI is positioned at the tuple
397 * returned previously.
400 * -- the relation indicated is opened for scanning so that the
401 * "cursor" is positioned before the first qualifying tuple.
403 * ----------------------------------------------------------------
406 ExecTidScan(TidScanState
*node
)
409 * use TidNext as access method
411 return ExecScan(&node
->ss
, (ExecScanAccessMtd
) TidNext
);
414 /* ----------------------------------------------------------------
415 * ExecTidReScan(node)
416 * ----------------------------------------------------------------
419 ExecTidReScan(TidScanState
*node
, ExprContext
*exprCtxt
)
424 estate
= node
->ss
.ps
.state
;
425 scanrelid
= ((TidScan
*) node
->ss
.ps
.plan
)->scan
.scanrelid
;
427 node
->ss
.ps
.ps_TupFromTlist
= false;
429 /* If we are being passed an outer tuple, save it for runtime key calc */
430 if (exprCtxt
!= NULL
)
431 node
->ss
.ps
.ps_ExprContext
->ecxt_outertuple
=
432 exprCtxt
->ecxt_outertuple
;
434 /* If this is re-scanning of PlanQual ... */
435 if (estate
->es_evTuple
!= NULL
&&
436 estate
->es_evTuple
[scanrelid
- 1] != NULL
)
438 estate
->es_evTupleNull
[scanrelid
- 1] = false;
442 if (node
->tss_TidList
)
443 pfree(node
->tss_TidList
);
444 node
->tss_TidList
= NULL
;
445 node
->tss_NumTids
= 0;
446 node
->tss_TidPtr
= -1;
449 /* ----------------------------------------------------------------
452 * Releases any storage allocated through C routines.
454 * ----------------------------------------------------------------
457 ExecEndTidScan(TidScanState
*node
)
460 * Free the exprcontext
462 ExecFreeExprContext(&node
->ss
.ps
);
465 * clear out tuple table slots
467 ExecClearTuple(node
->ss
.ps
.ps_ResultTupleSlot
);
468 ExecClearTuple(node
->ss
.ss_ScanTupleSlot
);
471 * close the heap relation.
473 ExecCloseScanRelation(node
->ss
.ss_currentRelation
);
476 /* ----------------------------------------------------------------
479 * Marks scan position by marking the current tid.
481 * ----------------------------------------------------------------
484 ExecTidMarkPos(TidScanState
*node
)
486 node
->tss_MarkTidPtr
= node
->tss_TidPtr
;
489 /* ----------------------------------------------------------------
492 * Restores scan position by restoring the current tid.
495 * XXX Assumes previously marked scan position belongs to current tid
496 * ----------------------------------------------------------------
499 ExecTidRestrPos(TidScanState
*node
)
501 node
->tss_TidPtr
= node
->tss_MarkTidPtr
;
504 /* ----------------------------------------------------------------
507 * Initializes the tid scan's state information, creates
508 * scan keys, and opens the base and tid relations.
511 * node: TidNode node produced by the planner.
512 * estate: the execution state initialized in InitPlan.
513 * ----------------------------------------------------------------
516 ExecInitTidScan(TidScan
*node
, EState
*estate
, int eflags
)
518 TidScanState
*tidstate
;
519 Relation currentRelation
;
522 * create state structure
524 tidstate
= makeNode(TidScanState
);
525 tidstate
->ss
.ps
.plan
= (Plan
*) node
;
526 tidstate
->ss
.ps
.state
= estate
;
529 * Miscellaneous initialization
531 * create expression context for node
533 ExecAssignExprContext(estate
, &tidstate
->ss
.ps
);
535 tidstate
->ss
.ps
.ps_TupFromTlist
= false;
538 * initialize child expressions
540 tidstate
->ss
.ps
.targetlist
= (List
*)
541 ExecInitExpr((Expr
*) node
->scan
.plan
.targetlist
,
542 (PlanState
*) tidstate
);
543 tidstate
->ss
.ps
.qual
= (List
*)
544 ExecInitExpr((Expr
*) node
->scan
.plan
.qual
,
545 (PlanState
*) tidstate
);
547 tidstate
->tss_tidquals
= (List
*)
548 ExecInitExpr((Expr
*) node
->tidquals
,
549 (PlanState
*) tidstate
);
551 #define TIDSCAN_NSLOTS 2
554 * tuple table initialization
556 ExecInitResultTupleSlot(estate
, &tidstate
->ss
.ps
);
557 ExecInitScanTupleSlot(estate
, &tidstate
->ss
);
560 * mark tid list as not computed yet
562 tidstate
->tss_TidList
= NULL
;
563 tidstate
->tss_NumTids
= 0;
564 tidstate
->tss_TidPtr
= -1;
567 * open the base relation and acquire appropriate lock on it.
569 currentRelation
= ExecOpenScanRelation(estate
, node
->scan
.scanrelid
);
571 tidstate
->ss
.ss_currentRelation
= currentRelation
;
572 tidstate
->ss
.ss_currentScanDesc
= NULL
; /* no heap scan here */
575 * get the scan type from the relation descriptor.
577 ExecAssignScanType(&tidstate
->ss
, RelationGetDescr(currentRelation
));
580 * Initialize result tuple type and projection info.
582 ExecAssignResultTypeFromTL(&tidstate
->ss
.ps
);
583 ExecAssignScanProjectionInfo(&tidstate
->ss
);
592 ExecCountSlotsTidScan(TidScan
*node
)
594 return ExecCountSlotsNode(outerPlan((Plan
*) node
)) +
595 ExecCountSlotsNode(innerPlan((Plan
*) node
)) + TIDSCAN_NSLOTS
;