1 /*-------------------------------------------------------------------------
4 * Routines to handle caching of results from parameterized nodes
6 * Portions Copyright (c) 2021-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/executor/nodeMemoize.c
13 * Memoize nodes are intended to sit above parameterized nodes in the plan
14 * tree in order to cache results from them. The intention here is that a
15 * repeat scan with a parameter value that has already been seen by the node
16 * can fetch tuples from the cache rather than having to re-scan the inner
17 * node all over again. The query planner may choose to make use of one of
18 * these when it thinks rescans for previously seen values are likely enough
19 * to warrant adding the additional node.
21 * The method of cache we use is a hash table. When the cache fills, we never
22 * spill tuples to disk, instead, we choose to evict the least recently used
23 * cache entry from the cache. We remember the least recently used entry by
24 * always pushing new entries and entries we look for onto the tail of a
25 * doubly linked list. This means that older items always bubble to the top
28 * Sometimes our callers won't run their scans to completion. For example a
29 * semi-join only needs to run until it finds a matching tuple, and once it
30 * does, the join operator skips to the next outer tuple and does not execute
31 * the inner side again on that scan. Because of this, we must keep track of
32 * when a cache entry is complete, and by default, we know it is when we run
33 * out of tuples to read during the scan. However, there are cases where we
34 * can mark the cache entry as complete without exhausting the scan of all
35 * tuples. One case is unique joins, where the join operator knows that there
36 * will only be at most one match for any given outer tuple. In order to
37 * support such cases we allow the "singlerow" option to be set for the cache.
38 * This option marks the cache entry as complete after we read the first tuple
41 * It's possible when we're filling the cache for a given set of parameters
42 * that we're unable to free enough memory to store any more tuples. If this
43 * happens then we'll have already evicted all other cache entries. When
44 * caching another tuple would cause us to exceed our memory budget, we must
45 * free the entry that we're currently populating and move the state machine
46 * into MEMO_CACHE_BYPASS_MODE. This means that we'll not attempt to cache
47 * any further tuples for this particular scan. We don't have the memory for
48 * it. The state machine will be reset again on the next rescan. If the
49 * memory requirements to cache the next parameter's tuples are less
50 * demanding, then that may allow us to start putting useful entries back into
55 * ExecMemoize - lookup cache, exec subplan when not found
56 * ExecInitMemoize - initialize node and subnodes
57 * ExecEndMemoize - shutdown node and subnodes
58 * ExecReScanMemoize - rescan the memoize node
60 * ExecMemoizeEstimate estimates DSM space needed for parallel plan
61 * ExecMemoizeInitializeDSM initialize DSM for parallel plan
62 * ExecMemoizeInitializeWorker attach to DSM info in parallel worker
63 * ExecMemoizeRetrieveInstrumentation get instrumentation from worker
64 *-------------------------------------------------------------------------
69 #include "common/hashfn.h"
70 #include "executor/executor.h"
71 #include "executor/nodeMemoize.h"
72 #include "lib/ilist.h"
73 #include "miscadmin.h"
74 #include "utils/datum.h"
75 #include "utils/lsyscache.h"
77 /* States of the ExecMemoize state machine */
78 #define MEMO_CACHE_LOOKUP 1 /* Attempt to perform a cache lookup */
79 #define MEMO_CACHE_FETCH_NEXT_TUPLE 2 /* Get another tuple from the cache */
80 #define MEMO_FILLING_CACHE 3 /* Read outer node to fill cache */
81 #define MEMO_CACHE_BYPASS_MODE 4 /* Bypass mode. Just read from our
82 * subplan without caching anything */
83 #define MEMO_END_OF_SCAN 5 /* Ready for rescan */
86 /* Helper macros for memory accounting */
87 #define EMPTY_ENTRY_MEMORY_BYTES(e) (sizeof(MemoizeEntry) + \
88 sizeof(MemoizeKey) + \
89 (e)->key->params->t_len);
90 #define CACHE_TUPLE_BYTES(t) (sizeof(MemoizeTuple) + \
93 /* MemoizeTuple Stores an individually cached tuple */
94 typedef struct MemoizeTuple
96 MinimalTuple mintuple
; /* Cached tuple */
97 struct MemoizeTuple
*next
; /* The next tuple with the same parameter
98 * values or NULL if it's the last one */
103 * The hash table key for cached entries plus the LRU list link
105 typedef struct MemoizeKey
108 dlist_node lru_node
; /* Pointer to next/prev key in LRU list */
113 * The data struct that the cache hash table stores
115 typedef struct MemoizeEntry
117 MemoizeKey
*key
; /* Hash key for hash table lookups */
118 MemoizeTuple
*tuplehead
; /* Pointer to the first tuple or NULL if no
119 * tuples are cached for this entry */
120 uint32 hash
; /* Hash value (cached) */
121 char status
; /* Hash status */
122 bool complete
; /* Did we read the outer plan to completion? */
126 #define SH_PREFIX memoize
127 #define SH_ELEMENT_TYPE MemoizeEntry
128 #define SH_KEY_TYPE MemoizeKey *
129 #define SH_SCOPE static inline
131 #include "lib/simplehash.h"
133 static uint32
MemoizeHash_hash(struct memoize_hash
*tb
,
134 const MemoizeKey
*key
);
135 static bool MemoizeHash_equal(struct memoize_hash
*tb
,
136 const MemoizeKey
*key1
,
137 const MemoizeKey
*key2
);
139 #define SH_PREFIX memoize
140 #define SH_ELEMENT_TYPE MemoizeEntry
141 #define SH_KEY_TYPE MemoizeKey *
143 #define SH_HASH_KEY(tb, key) MemoizeHash_hash(tb, key)
144 #define SH_EQUAL(tb, a, b) MemoizeHash_equal(tb, a, b)
145 #define SH_SCOPE static inline
146 #define SH_STORE_HASH
147 #define SH_GET_HASH(tb, a) a->hash
149 #include "lib/simplehash.h"
153 * Hash function for simplehash hashtable. 'key' is unused here as we
154 * require that all table lookups first populate the MemoizeState's
155 * probeslot with the key values to be looked up.
158 MemoizeHash_hash(struct memoize_hash
*tb
, const MemoizeKey
*key
)
160 MemoizeState
*mstate
= (MemoizeState
*) tb
->private_data
;
161 ExprContext
*econtext
= mstate
->ss
.ps
.ps_ExprContext
;
162 MemoryContext oldcontext
;
163 TupleTableSlot
*pslot
= mstate
->probeslot
;
165 int numkeys
= mstate
->nkeys
;
167 oldcontext
= MemoryContextSwitchTo(econtext
->ecxt_per_tuple_memory
);
169 if (mstate
->binary_mode
)
171 for (int i
= 0; i
< numkeys
; i
++)
173 /* combine successive hashkeys by rotating */
174 hashkey
= pg_rotate_left32(hashkey
, 1);
176 if (!pslot
->tts_isnull
[i
]) /* treat nulls as having hash key 0 */
178 Form_pg_attribute attr
;
181 attr
= TupleDescAttr(pslot
->tts_tupleDescriptor
, i
);
183 hkey
= datum_image_hash(pslot
->tts_values
[i
], attr
->attbyval
, attr
->attlen
);
191 FmgrInfo
*hashfunctions
= mstate
->hashfunctions
;
192 Oid
*collations
= mstate
->collations
;
194 for (int i
= 0; i
< numkeys
; i
++)
196 /* combine successive hashkeys by rotating */
197 hashkey
= pg_rotate_left32(hashkey
, 1);
199 if (!pslot
->tts_isnull
[i
]) /* treat nulls as having hash key 0 */
203 hkey
= DatumGetUInt32(FunctionCall1Coll(&hashfunctions
[i
],
204 collations
[i
], pslot
->tts_values
[i
]));
210 MemoryContextSwitchTo(oldcontext
);
211 return murmurhash32(hashkey
);
216 * Equality function for confirming hash value matches during a hash
217 * table lookup. 'key2' is never used. Instead the MemoizeState's
218 * probeslot is always populated with details of what's being looked up.
221 MemoizeHash_equal(struct memoize_hash
*tb
, const MemoizeKey
*key1
,
222 const MemoizeKey
*key2
)
224 MemoizeState
*mstate
= (MemoizeState
*) tb
->private_data
;
225 ExprContext
*econtext
= mstate
->ss
.ps
.ps_ExprContext
;
226 TupleTableSlot
*tslot
= mstate
->tableslot
;
227 TupleTableSlot
*pslot
= mstate
->probeslot
;
229 /* probeslot should have already been prepared by prepare_probe_slot() */
230 ExecStoreMinimalTuple(key1
->params
, tslot
, false);
232 if (mstate
->binary_mode
)
234 MemoryContext oldcontext
;
235 int numkeys
= mstate
->nkeys
;
238 oldcontext
= MemoryContextSwitchTo(econtext
->ecxt_per_tuple_memory
);
240 slot_getallattrs(tslot
);
241 slot_getallattrs(pslot
);
243 for (int i
= 0; i
< numkeys
; i
++)
245 Form_pg_attribute attr
;
247 if (tslot
->tts_isnull
[i
] != pslot
->tts_isnull
[i
])
253 /* both NULL? they're equal */
254 if (tslot
->tts_isnull
[i
])
257 /* perform binary comparison on the two datums */
258 attr
= TupleDescAttr(tslot
->tts_tupleDescriptor
, i
);
259 if (!datum_image_eq(tslot
->tts_values
[i
], pslot
->tts_values
[i
],
260 attr
->attbyval
, attr
->attlen
))
267 MemoryContextSwitchTo(oldcontext
);
272 econtext
->ecxt_innertuple
= tslot
;
273 econtext
->ecxt_outertuple
= pslot
;
274 return ExecQual(mstate
->cache_eq_expr
, econtext
);
279 * Initialize the hash table to empty. The MemoizeState's hashtable field
280 * must point to NULL.
283 build_hash_table(MemoizeState
*mstate
, uint32 size
)
285 Assert(mstate
->hashtable
== NULL
);
287 /* Make a guess at a good size when we're not given a valid size. */
291 /* memoize_create will convert the size to a power of 2 */
292 mstate
->hashtable
= memoize_create(mstate
->tableContext
, size
, mstate
);
297 * Populate mstate's probeslot with the values from the tuple stored
298 * in 'key'. If 'key' is NULL, then perform the population by evaluating
299 * mstate's param_exprs.
302 prepare_probe_slot(MemoizeState
*mstate
, MemoizeKey
*key
)
304 TupleTableSlot
*pslot
= mstate
->probeslot
;
305 TupleTableSlot
*tslot
= mstate
->tableslot
;
306 int numKeys
= mstate
->nkeys
;
308 ExecClearTuple(pslot
);
312 ExprContext
*econtext
= mstate
->ss
.ps
.ps_ExprContext
;
313 MemoryContext oldcontext
;
315 oldcontext
= MemoryContextSwitchTo(econtext
->ecxt_per_tuple_memory
);
317 /* Set the probeslot's values based on the current parameter values */
318 for (int i
= 0; i
< numKeys
; i
++)
319 pslot
->tts_values
[i
] = ExecEvalExpr(mstate
->param_exprs
[i
],
321 &pslot
->tts_isnull
[i
]);
323 MemoryContextSwitchTo(oldcontext
);
327 /* Process the key's MinimalTuple and store the values in probeslot */
328 ExecStoreMinimalTuple(key
->params
, tslot
, false);
329 slot_getallattrs(tslot
);
330 memcpy(pslot
->tts_values
, tslot
->tts_values
, sizeof(Datum
) * numKeys
);
331 memcpy(pslot
->tts_isnull
, tslot
->tts_isnull
, sizeof(bool) * numKeys
);
334 ExecStoreVirtualTuple(pslot
);
339 * Remove all tuples from the cache entry pointed to by 'entry'. This
340 * leaves an empty cache entry. Also, update the memory accounting to
341 * reflect the removal of the tuples.
344 entry_purge_tuples(MemoizeState
*mstate
, MemoizeEntry
*entry
)
346 MemoizeTuple
*tuple
= entry
->tuplehead
;
347 uint64 freed_mem
= 0;
349 while (tuple
!= NULL
)
351 MemoizeTuple
*next
= tuple
->next
;
353 freed_mem
+= CACHE_TUPLE_BYTES(tuple
);
355 /* Free memory used for this tuple */
356 pfree(tuple
->mintuple
);
362 entry
->complete
= false;
363 entry
->tuplehead
= NULL
;
365 /* Update the memory accounting */
366 mstate
->mem_used
-= freed_mem
;
371 * Remove 'entry' from the cache and free memory used by it.
374 remove_cache_entry(MemoizeState
*mstate
, MemoizeEntry
*entry
)
376 MemoizeKey
*key
= entry
->key
;
378 dlist_delete(&entry
->key
->lru_node
);
380 /* Remove all of the tuples from this entry */
381 entry_purge_tuples(mstate
, entry
);
384 * Update memory accounting. entry_purge_tuples should have already
385 * subtracted the memory used for each cached tuple. Here we just update
386 * the amount used by the entry itself.
388 mstate
->mem_used
-= EMPTY_ENTRY_MEMORY_BYTES(entry
);
390 /* Remove the entry from the cache */
391 memoize_delete_item(mstate
->hashtable
, entry
);
399 * Remove all items from the cache
402 cache_purge_all(MemoizeState
*mstate
)
404 uint64 evictions
= 0;
406 if (mstate
->hashtable
!= NULL
)
407 evictions
= mstate
->hashtable
->members
;
410 * Likely the most efficient way to remove all items is to just reset the
411 * memory context for the cache and then rebuild a fresh hash table. This
412 * saves having to remove each item one by one and pfree each cached tuple
414 MemoryContextReset(mstate
->tableContext
);
416 /* NULLify so we recreate the table on the next call */
417 mstate
->hashtable
= NULL
;
419 /* reset the LRU list */
420 dlist_init(&mstate
->lru_list
);
421 mstate
->last_tuple
= NULL
;
422 mstate
->entry
= NULL
;
424 mstate
->mem_used
= 0;
426 /* XXX should we add something new to track these purges? */
427 mstate
->stats
.cache_evictions
+= evictions
; /* Update Stats */
431 * cache_reduce_memory
432 * Evict older and less recently used items from the cache in order to
433 * reduce the memory consumption back to something below the
434 * MemoizeState's mem_limit.
436 * 'specialkey', if not NULL, causes the function to return false if the entry
437 * which the key belongs to is removed from the cache.
440 cache_reduce_memory(MemoizeState
*mstate
, MemoizeKey
*specialkey
)
442 bool specialkey_intact
= true; /* for now */
443 dlist_mutable_iter iter
;
444 uint64 evictions
= 0;
446 /* Update peak memory usage */
447 if (mstate
->mem_used
> mstate
->stats
.mem_peak
)
448 mstate
->stats
.mem_peak
= mstate
->mem_used
;
450 /* We expect only to be called when we've gone over budget on memory */
451 Assert(mstate
->mem_used
> mstate
->mem_limit
);
453 /* Start the eviction process starting at the head of the LRU list. */
454 dlist_foreach_modify(iter
, &mstate
->lru_list
)
456 MemoizeKey
*key
= dlist_container(MemoizeKey
, lru_node
, iter
.cur
);
460 * Populate the hash probe slot in preparation for looking up this LRU
463 prepare_probe_slot(mstate
, key
);
466 * Ideally the LRU list pointers would be stored in the entry itself
467 * rather than in the key. Unfortunately, we can't do that as the
468 * simplehash.h code may resize the table and allocate new memory for
469 * entries which would result in those pointers pointing to the old
470 * buckets. However, it's fine to use the key to store this as that's
471 * only referenced by a pointer in the entry, which of course follows
472 * the entry whenever the hash table is resized. Since we only have a
473 * pointer to the key here, we must perform a hash table lookup to
474 * find the entry that the key belongs to.
476 entry
= memoize_lookup(mstate
->hashtable
, NULL
);
479 * Sanity check that we found the entry belonging to the LRU list
480 * item. A misbehaving hash or equality function could cause the
481 * entry not to be found or the wrong entry to be found.
483 if (unlikely(entry
== NULL
|| entry
->key
!= key
))
484 elog(ERROR
, "could not find memoization table entry");
487 * If we're being called to free memory while the cache is being
488 * populated with new tuples, then we'd better take some care as we
489 * could end up freeing the entry which 'specialkey' belongs to.
490 * Generally callers will pass 'specialkey' as the key for the cache
491 * entry which is currently being populated, so we must set
492 * 'specialkey_intact' to false to inform the caller the specialkey
493 * entry has been removed.
495 if (key
== specialkey
)
496 specialkey_intact
= false;
499 * Finally remove the entry. This will remove from the LRU list too.
501 remove_cache_entry(mstate
, entry
);
505 /* Exit if we've freed enough memory */
506 if (mstate
->mem_used
<= mstate
->mem_limit
)
510 mstate
->stats
.cache_evictions
+= evictions
; /* Update Stats */
512 return specialkey_intact
;
517 * Perform a lookup to see if we've already cached tuples based on the
518 * scan's current parameters. If we find an existing entry we move it to
519 * the end of the LRU list, set *found to true then return it. If we
520 * don't find an entry then we create a new one and add it to the end of
521 * the LRU list. We also update cache memory accounting and remove older
522 * entries if we go over the memory budget. If we managed to free enough
523 * memory we return the new entry, else we return NULL.
525 * Callers can assume we'll never return NULL when *found is true.
527 static MemoizeEntry
*
528 cache_lookup(MemoizeState
*mstate
, bool *found
)
532 MemoryContext oldcontext
;
534 /* prepare the probe slot with the current scan parameters */
535 prepare_probe_slot(mstate
, NULL
);
538 * Add the new entry to the cache. No need to pass a valid key since the
539 * hash function uses mstate's probeslot, which we populated above.
541 entry
= memoize_insert(mstate
->hashtable
, NULL
, found
);
546 * Move existing entry to the tail of the LRU list to mark it as the
547 * most recently used item.
549 dlist_move_tail(&mstate
->lru_list
, &entry
->key
->lru_node
);
554 oldcontext
= MemoryContextSwitchTo(mstate
->tableContext
);
556 /* Allocate a new key */
557 entry
->key
= key
= (MemoizeKey
*) palloc(sizeof(MemoizeKey
));
558 key
->params
= ExecCopySlotMinimalTuple(mstate
->probeslot
);
560 /* Update the total cache memory utilization */
561 mstate
->mem_used
+= EMPTY_ENTRY_MEMORY_BYTES(entry
);
563 /* Initialize this entry */
564 entry
->complete
= false;
565 entry
->tuplehead
= NULL
;
568 * Since this is the most recently used entry, push this entry onto the
569 * end of the LRU list.
571 dlist_push_tail(&mstate
->lru_list
, &entry
->key
->lru_node
);
573 mstate
->last_tuple
= NULL
;
575 MemoryContextSwitchTo(oldcontext
);
578 * If we've gone over our memory budget, then we'll free up some space in
581 if (mstate
->mem_used
> mstate
->mem_limit
)
584 * Try to free up some memory. It's highly unlikely that we'll fail
585 * to do so here since the entry we've just added is yet to contain
586 * any tuples and we're able to remove any other entry to reduce the
587 * memory consumption.
589 if (unlikely(!cache_reduce_memory(mstate
, key
)))
593 * The process of removing entries from the cache may have caused the
594 * code in simplehash.h to shuffle elements to earlier buckets in the
595 * hash table. If it has, we'll need to find the entry again by
596 * performing a lookup. Fortunately, we can detect if this has
597 * happened by seeing if the entry is still in use and that the key
598 * pointer matches our expected key.
600 if (entry
->status
!= memoize_SH_IN_USE
|| entry
->key
!= key
)
603 * We need to repopulate the probeslot as lookups performed during
604 * the cache evictions above will have stored some other key.
606 prepare_probe_slot(mstate
, key
);
608 /* Re-find the newly added entry */
609 entry
= memoize_lookup(mstate
->hashtable
, NULL
);
610 Assert(entry
!= NULL
);
619 * Add the tuple stored in 'slot' to the mstate's current cache entry.
620 * The cache entry must have already been made with cache_lookup().
621 * mstate's last_tuple field must point to the tail of mstate->entry's
625 cache_store_tuple(MemoizeState
*mstate
, TupleTableSlot
*slot
)
628 MemoizeEntry
*entry
= mstate
->entry
;
629 MemoryContext oldcontext
;
631 Assert(slot
!= NULL
);
632 Assert(entry
!= NULL
);
634 oldcontext
= MemoryContextSwitchTo(mstate
->tableContext
);
636 tuple
= (MemoizeTuple
*) palloc(sizeof(MemoizeTuple
));
637 tuple
->mintuple
= ExecCopySlotMinimalTuple(slot
);
640 /* Account for the memory we just consumed */
641 mstate
->mem_used
+= CACHE_TUPLE_BYTES(tuple
);
643 if (entry
->tuplehead
== NULL
)
646 * This is the first tuple for this entry, so just point the list head
649 entry
->tuplehead
= tuple
;
653 /* push this tuple onto the tail of the list */
654 mstate
->last_tuple
->next
= tuple
;
657 mstate
->last_tuple
= tuple
;
658 MemoryContextSwitchTo(oldcontext
);
661 * If we've gone over our memory budget then free up some space in the
664 if (mstate
->mem_used
> mstate
->mem_limit
)
666 MemoizeKey
*key
= entry
->key
;
668 if (!cache_reduce_memory(mstate
, key
))
672 * The process of removing entries from the cache may have caused the
673 * code in simplehash.h to shuffle elements to earlier buckets in the
674 * hash table. If it has, we'll need to find the entry again by
675 * performing a lookup. Fortunately, we can detect if this has
676 * happened by seeing if the entry is still in use and that the key
677 * pointer matches our expected key.
679 if (entry
->status
!= memoize_SH_IN_USE
|| entry
->key
!= key
)
682 * We need to repopulate the probeslot as lookups performed during
683 * the cache evictions above will have stored some other key.
685 prepare_probe_slot(mstate
, key
);
687 /* Re-find the entry */
688 mstate
->entry
= entry
= memoize_lookup(mstate
->hashtable
, NULL
);
689 Assert(entry
!= NULL
);
696 static TupleTableSlot
*
697 ExecMemoize(PlanState
*pstate
)
699 MemoizeState
*node
= castNode(MemoizeState
, pstate
);
700 ExprContext
*econtext
= node
->ss
.ps
.ps_ExprContext
;
701 PlanState
*outerNode
;
702 TupleTableSlot
*slot
;
704 CHECK_FOR_INTERRUPTS();
707 * Reset per-tuple memory context to free any expression evaluation
708 * storage allocated in the previous tuple cycle.
710 ResetExprContext(econtext
);
712 switch (node
->mstatus
)
714 case MEMO_CACHE_LOOKUP
:
717 TupleTableSlot
*outerslot
;
720 Assert(node
->entry
== NULL
);
722 /* first call? we'll need a hash table. */
723 if (unlikely(node
->hashtable
== NULL
))
724 build_hash_table(node
, ((Memoize
*) pstate
->plan
)->est_entries
);
727 * We're only ever in this state for the first call of the
728 * scan. Here we have a look to see if we've already seen the
729 * current parameters before and if we have already cached a
730 * complete set of records that the outer plan will return for
733 * When we find a valid cache entry, we'll return the first
734 * tuple from it. If not found, we'll create a cache entry and
735 * then try to fetch a tuple from the outer scan. If we find
736 * one there, we'll try to cache it.
739 /* see if we've got anything cached for the current parameters */
740 entry
= cache_lookup(node
, &found
);
742 if (found
&& entry
->complete
)
744 node
->stats
.cache_hits
+= 1; /* stats update */
747 * Set last_tuple and entry so that the state
748 * MEMO_CACHE_FETCH_NEXT_TUPLE can easily find the next
749 * tuple for these parameters.
751 node
->last_tuple
= entry
->tuplehead
;
754 /* Fetch the first cached tuple, if there is one */
755 if (entry
->tuplehead
)
757 node
->mstatus
= MEMO_CACHE_FETCH_NEXT_TUPLE
;
759 slot
= node
->ss
.ps
.ps_ResultTupleSlot
;
760 ExecStoreMinimalTuple(entry
->tuplehead
->mintuple
,
766 /* The cache entry is void of any tuples. */
767 node
->mstatus
= MEMO_END_OF_SCAN
;
771 /* Handle cache miss */
772 node
->stats
.cache_misses
+= 1; /* stats update */
777 * A cache entry was found, but the scan for that entry
778 * did not run to completion. We'll just remove all
779 * tuples and start again. It might be tempting to
780 * continue where we left off, but there's no guarantee
781 * the outer node will produce the tuples in the same
782 * order as it did last time.
784 entry_purge_tuples(node
, entry
);
787 /* Scan the outer node for a tuple to cache */
788 outerNode
= outerPlanState(node
);
789 outerslot
= ExecProcNode(outerNode
);
790 if (TupIsNull(outerslot
))
793 * cache_lookup may have returned NULL due to failure to
794 * free enough cache space, so ensure we don't do anything
795 * here that assumes it worked. There's no need to go into
796 * bypass mode here as we're setting mstatus to end of
800 entry
->complete
= true;
802 node
->mstatus
= MEMO_END_OF_SCAN
;
809 * If we failed to create the entry or failed to store the
810 * tuple in the entry, then go into bypass mode.
812 if (unlikely(entry
== NULL
||
813 !cache_store_tuple(node
, outerslot
)))
815 node
->stats
.cache_overflows
+= 1; /* stats update */
817 node
->mstatus
= MEMO_CACHE_BYPASS_MODE
;
820 * No need to clear out last_tuple as we'll stay in bypass
821 * mode until the end of the scan.
827 * If we only expect a single row from this scan then we
828 * can mark that we're not expecting more. This allows
829 * cache lookups to work even when the scan has not been
830 * executed to completion.
832 entry
->complete
= node
->singlerow
;
833 node
->mstatus
= MEMO_FILLING_CACHE
;
836 slot
= node
->ss
.ps
.ps_ResultTupleSlot
;
837 ExecCopySlot(slot
, outerslot
);
841 case MEMO_CACHE_FETCH_NEXT_TUPLE
:
843 /* We shouldn't be in this state if these are not set */
844 Assert(node
->entry
!= NULL
);
845 Assert(node
->last_tuple
!= NULL
);
847 /* Skip to the next tuple to output */
848 node
->last_tuple
= node
->last_tuple
->next
;
850 /* No more tuples in the cache */
851 if (node
->last_tuple
== NULL
)
853 node
->mstatus
= MEMO_END_OF_SCAN
;
857 slot
= node
->ss
.ps
.ps_ResultTupleSlot
;
858 ExecStoreMinimalTuple(node
->last_tuple
->mintuple
, slot
,
864 case MEMO_FILLING_CACHE
:
866 TupleTableSlot
*outerslot
;
867 MemoizeEntry
*entry
= node
->entry
;
869 /* entry should already have been set by MEMO_CACHE_LOOKUP */
870 Assert(entry
!= NULL
);
873 * When in the MEMO_FILLING_CACHE state, we've just had a
874 * cache miss and are populating the cache with the current
877 outerNode
= outerPlanState(node
);
878 outerslot
= ExecProcNode(outerNode
);
879 if (TupIsNull(outerslot
))
881 /* No more tuples. Mark it as complete */
882 entry
->complete
= true;
883 node
->mstatus
= MEMO_END_OF_SCAN
;
888 * Validate if the planner properly set the singlerow flag. It
889 * should only set that if each cache entry can, at most,
892 if (unlikely(entry
->complete
))
893 elog(ERROR
, "cache entry already complete");
895 /* Record the tuple in the current cache entry */
896 if (unlikely(!cache_store_tuple(node
, outerslot
)))
898 /* Couldn't store it? Handle overflow */
899 node
->stats
.cache_overflows
+= 1; /* stats update */
901 node
->mstatus
= MEMO_CACHE_BYPASS_MODE
;
904 * No need to clear out entry or last_tuple as we'll stay
905 * in bypass mode until the end of the scan.
909 slot
= node
->ss
.ps
.ps_ResultTupleSlot
;
910 ExecCopySlot(slot
, outerslot
);
914 case MEMO_CACHE_BYPASS_MODE
:
916 TupleTableSlot
*outerslot
;
919 * When in bypass mode we just continue to read tuples without
920 * caching. We need to wait until the next rescan before we
921 * can come out of this mode.
923 outerNode
= outerPlanState(node
);
924 outerslot
= ExecProcNode(outerNode
);
925 if (TupIsNull(outerslot
))
927 node
->mstatus
= MEMO_END_OF_SCAN
;
931 slot
= node
->ss
.ps
.ps_ResultTupleSlot
;
932 ExecCopySlot(slot
, outerslot
);
936 case MEMO_END_OF_SCAN
:
939 * We've already returned NULL for this scan, but just in case
940 * something calls us again by mistake.
945 elog(ERROR
, "unrecognized memoize state: %d",
946 (int) node
->mstatus
);
952 ExecInitMemoize(Memoize
*node
, EState
*estate
, int eflags
)
954 MemoizeState
*mstate
= makeNode(MemoizeState
);
960 /* check for unsupported flags */
961 Assert(!(eflags
& (EXEC_FLAG_BACKWARD
| EXEC_FLAG_MARK
)));
963 mstate
->ss
.ps
.plan
= (Plan
*) node
;
964 mstate
->ss
.ps
.state
= estate
;
965 mstate
->ss
.ps
.ExecProcNode
= ExecMemoize
;
968 * Miscellaneous initialization
970 * create expression context for node
972 ExecAssignExprContext(estate
, &mstate
->ss
.ps
);
974 outerNode
= outerPlan(node
);
975 outerPlanState(mstate
) = ExecInitNode(outerNode
, estate
, eflags
);
978 * Initialize return slot and type. No need to initialize projection info
979 * because this node doesn't do projections.
981 ExecInitResultTupleSlotTL(&mstate
->ss
.ps
, &TTSOpsMinimalTuple
);
982 mstate
->ss
.ps
.ps_ProjInfo
= NULL
;
985 * Initialize scan slot and type.
987 ExecCreateScanSlotFromOuterPlan(estate
, &mstate
->ss
, &TTSOpsMinimalTuple
);
990 * Set the state machine to lookup the cache. We won't find anything
991 * until we cache something, but this saves a special case to create the
994 mstate
->mstatus
= MEMO_CACHE_LOOKUP
;
996 mstate
->nkeys
= nkeys
= node
->numKeys
;
997 mstate
->hashkeydesc
= ExecTypeFromExprList(node
->param_exprs
);
998 mstate
->tableslot
= MakeSingleTupleTableSlot(mstate
->hashkeydesc
,
999 &TTSOpsMinimalTuple
);
1000 mstate
->probeslot
= MakeSingleTupleTableSlot(mstate
->hashkeydesc
,
1003 mstate
->param_exprs
= (ExprState
**) palloc(nkeys
* sizeof(ExprState
*));
1004 mstate
->collations
= node
->collations
; /* Just point directly to the plan
1006 mstate
->hashfunctions
= (FmgrInfo
*) palloc(nkeys
* sizeof(FmgrInfo
));
1008 eqfuncoids
= palloc(nkeys
* sizeof(Oid
));
1010 for (i
= 0; i
< nkeys
; i
++)
1012 Oid hashop
= node
->hashOperators
[i
];
1015 Expr
*param_expr
= (Expr
*) list_nth(node
->param_exprs
, i
);
1017 if (!get_op_hash_functions(hashop
, &left_hashfn
, &right_hashfn
))
1018 elog(ERROR
, "could not find hash function for hash operator %u",
1021 fmgr_info(left_hashfn
, &mstate
->hashfunctions
[i
]);
1023 mstate
->param_exprs
[i
] = ExecInitExpr(param_expr
, (PlanState
*) mstate
);
1024 eqfuncoids
[i
] = get_opcode(hashop
);
1027 mstate
->cache_eq_expr
= ExecBuildParamSetEqual(mstate
->hashkeydesc
,
1028 &TTSOpsMinimalTuple
,
1033 (PlanState
*) mstate
);
1036 mstate
->mem_used
= 0;
1038 /* Limit the total memory consumed by the cache to this */
1039 mstate
->mem_limit
= get_hash_memory_limit();
1041 /* A memory context dedicated for the cache */
1042 mstate
->tableContext
= AllocSetContextCreate(CurrentMemoryContext
,
1044 ALLOCSET_DEFAULT_SIZES
);
1046 dlist_init(&mstate
->lru_list
);
1047 mstate
->last_tuple
= NULL
;
1048 mstate
->entry
= NULL
;
1051 * Mark if we can assume the cache entry is completed after we get the
1052 * first record for it. Some callers might not call us again after
1053 * getting the first match. e.g. A join operator performing a unique join
1054 * is able to skip to the next outer tuple after getting the first
1055 * matching inner tuple. In this case, the cache entry is complete after
1056 * getting the first tuple. This allows us to mark it as so.
1058 mstate
->singlerow
= node
->singlerow
;
1059 mstate
->keyparamids
= node
->keyparamids
;
1062 * Record if the cache keys should be compared bit by bit, or logically
1063 * using the type's hash equality operator
1065 mstate
->binary_mode
= node
->binary_mode
;
1067 /* Zero the statistics counters */
1068 memset(&mstate
->stats
, 0, sizeof(MemoizeInstrumentation
));
1071 * Because it may require a large allocation, we delay building of the
1072 * hash table until executor run.
1074 mstate
->hashtable
= NULL
;
1080 ExecEndMemoize(MemoizeState
*node
)
1082 #ifdef USE_ASSERT_CHECKING
1083 /* Validate the memory accounting code is correct in assert builds. */
1084 if (node
->hashtable
!= NULL
)
1089 MemoizeEntry
*entry
;
1091 memoize_start_iterate(node
->hashtable
, &i
);
1094 while ((entry
= memoize_iterate(node
->hashtable
, &i
)) != NULL
)
1096 MemoizeTuple
*tuple
= entry
->tuplehead
;
1098 mem
+= EMPTY_ENTRY_MEMORY_BYTES(entry
);
1099 while (tuple
!= NULL
)
1101 mem
+= CACHE_TUPLE_BYTES(tuple
);
1102 tuple
= tuple
->next
;
1107 Assert(count
== node
->hashtable
->members
);
1108 Assert(mem
== node
->mem_used
);
1113 * When ending a parallel worker, copy the statistics gathered by the
1114 * worker back into shared memory so that it can be picked up by the main
1115 * process to report in EXPLAIN ANALYZE.
1117 if (node
->shared_info
!= NULL
&& IsParallelWorker())
1119 MemoizeInstrumentation
*si
;
1121 /* Make mem_peak available for EXPLAIN */
1122 if (node
->stats
.mem_peak
== 0)
1123 node
->stats
.mem_peak
= node
->mem_used
;
1125 Assert(ParallelWorkerNumber
<= node
->shared_info
->num_workers
);
1126 si
= &node
->shared_info
->sinstrument
[ParallelWorkerNumber
];
1127 memcpy(si
, &node
->stats
, sizeof(MemoizeInstrumentation
));
1130 /* Remove the cache context */
1131 MemoryContextDelete(node
->tableContext
);
1134 * shut down the subplan
1136 ExecEndNode(outerPlanState(node
));
1140 ExecReScanMemoize(MemoizeState
*node
)
1142 PlanState
*outerPlan
= outerPlanState(node
);
1144 /* Mark that we must lookup the cache for a new set of parameters */
1145 node
->mstatus
= MEMO_CACHE_LOOKUP
;
1147 /* nullify pointers used for the last scan */
1149 node
->last_tuple
= NULL
;
1152 * if chgParam of subnode is not null then plan will be re-scanned by
1153 * first ExecProcNode.
1155 if (outerPlan
->chgParam
== NULL
)
1156 ExecReScan(outerPlan
);
1159 * Purge the entire cache if a parameter changed that is not part of the
1162 if (bms_nonempty_difference(outerPlan
->chgParam
, node
->keyparamids
))
1163 cache_purge_all(node
);
1167 * ExecEstimateCacheEntryOverheadBytes
1168 * For use in the query planner to help it estimate the amount of memory
1169 * required to store a single entry in the cache.
1172 ExecEstimateCacheEntryOverheadBytes(double ntuples
)
1174 return sizeof(MemoizeEntry
) + sizeof(MemoizeKey
) + sizeof(MemoizeTuple
) *
1178 /* ----------------------------------------------------------------
1179 * Parallel Query Support
1180 * ----------------------------------------------------------------
1183 /* ----------------------------------------------------------------
1184 * ExecMemoizeEstimate
1186 * Estimate space required to propagate memoize statistics.
1187 * ----------------------------------------------------------------
1190 ExecMemoizeEstimate(MemoizeState
*node
, ParallelContext
*pcxt
)
1194 /* don't need this if not instrumenting or no workers */
1195 if (!node
->ss
.ps
.instrument
|| pcxt
->nworkers
== 0)
1198 size
= mul_size(pcxt
->nworkers
, sizeof(MemoizeInstrumentation
));
1199 size
= add_size(size
, offsetof(SharedMemoizeInfo
, sinstrument
));
1200 shm_toc_estimate_chunk(&pcxt
->estimator
, size
);
1201 shm_toc_estimate_keys(&pcxt
->estimator
, 1);
1204 /* ----------------------------------------------------------------
1205 * ExecMemoizeInitializeDSM
1207 * Initialize DSM space for memoize statistics.
1208 * ----------------------------------------------------------------
1211 ExecMemoizeInitializeDSM(MemoizeState
*node
, ParallelContext
*pcxt
)
1215 /* don't need this if not instrumenting or no workers */
1216 if (!node
->ss
.ps
.instrument
|| pcxt
->nworkers
== 0)
1219 size
= offsetof(SharedMemoizeInfo
, sinstrument
)
1220 + pcxt
->nworkers
* sizeof(MemoizeInstrumentation
);
1221 node
->shared_info
= shm_toc_allocate(pcxt
->toc
, size
);
1222 /* ensure any unfilled slots will contain zeroes */
1223 memset(node
->shared_info
, 0, size
);
1224 node
->shared_info
->num_workers
= pcxt
->nworkers
;
1225 shm_toc_insert(pcxt
->toc
, node
->ss
.ps
.plan
->plan_node_id
,
1229 /* ----------------------------------------------------------------
1230 * ExecMemoizeInitializeWorker
1232 * Attach worker to DSM space for memoize statistics.
1233 * ----------------------------------------------------------------
1236 ExecMemoizeInitializeWorker(MemoizeState
*node
, ParallelWorkerContext
*pwcxt
)
1239 shm_toc_lookup(pwcxt
->toc
, node
->ss
.ps
.plan
->plan_node_id
, true);
1242 /* ----------------------------------------------------------------
1243 * ExecMemoizeRetrieveInstrumentation
1245 * Transfer memoize statistics from DSM to private memory.
1246 * ----------------------------------------------------------------
1249 ExecMemoizeRetrieveInstrumentation(MemoizeState
*node
)
1252 SharedMemoizeInfo
*si
;
1254 if (node
->shared_info
== NULL
)
1257 size
= offsetof(SharedMemoizeInfo
, sinstrument
)
1258 + node
->shared_info
->num_workers
* sizeof(MemoizeInstrumentation
);
1260 memcpy(si
, node
->shared_info
, size
);
1261 node
->shared_info
= si
;