nbtree: fix read page recheck typo.
[pgsql.git] / src / backend / utils / activity / pgstat_shmem.c
bloba09c6fee055e55ba73945b9e4474af60ce8a491d
1 /* -------------------------------------------------------------------------
3 * pgstat_shmem.c
4 * Storage of stats entries in shared memory
6 * Copyright (c) 2001-2024, PostgreSQL Global Development Group
8 * IDENTIFICATION
9 * src/backend/utils/activity/pgstat_shmem.c
10 * -------------------------------------------------------------------------
13 #include "postgres.h"
15 #include "pgstat.h"
16 #include "storage/shmem.h"
17 #include "utils/memutils.h"
18 #include "utils/pgstat_internal.h"
21 #define PGSTAT_ENTRY_REF_HASH_SIZE 128
23 /* hash table entry for finding the PgStat_EntryRef for a key */
24 typedef struct PgStat_EntryRefHashEntry
26 PgStat_HashKey key; /* hash key */
27 char status; /* for simplehash use */
28 PgStat_EntryRef *entry_ref;
29 } PgStat_EntryRefHashEntry;
32 /* for references to shared statistics entries */
33 #define SH_PREFIX pgstat_entry_ref_hash
34 #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
35 #define SH_KEY_TYPE PgStat_HashKey
36 #define SH_KEY key
37 #define SH_HASH_KEY(tb, key) \
38 pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
39 #define SH_EQUAL(tb, a, b) \
40 pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
41 #define SH_SCOPE static inline
42 #define SH_DEFINE
43 #define SH_DECLARE
44 #include "lib/simplehash.h"
47 static void pgstat_drop_database_and_contents(Oid dboid);
49 static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
51 static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
52 static bool pgstat_need_entry_refs_gc(void);
53 static void pgstat_gc_entry_refs(void);
54 static void pgstat_release_all_entry_refs(bool discard_pending);
55 typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
56 static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
58 static void pgstat_setup_memcxt(void);
61 /* parameter for the shared hash */
62 static const dshash_parameters dsh_params = {
63 sizeof(PgStat_HashKey),
64 sizeof(PgStatShared_HashEntry),
65 pgstat_cmp_hash_key,
66 pgstat_hash_hash_key,
67 dshash_memcpy,
68 LWTRANCHE_PGSTATS_HASH
73 * Backend local references to shared stats entries. If there are pending
74 * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
75 * list.
77 * When a stats entry is dropped each backend needs to release its reference
78 * to it before the memory can be released. To trigger that
79 * pgStatLocal.shmem->gc_request_count is incremented - which each backend
80 * compares to their copy of pgStatSharedRefAge on a regular basis.
82 static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
83 static int pgStatSharedRefAge = 0; /* cache age of pgStatLocal.shmem */
86 * Memory contexts containing the pgStatEntryRefHash table and the
87 * pgStatSharedRef entries respectively. Kept separate to make it easier to
88 * track / attribute memory usage.
90 static MemoryContext pgStatSharedRefContext = NULL;
91 static MemoryContext pgStatEntryRefHashContext = NULL;
94 /* ------------------------------------------------------------
95 * Public functions called from postmaster follow
96 * ------------------------------------------------------------
100 * The size of the shared memory allocation for stats stored in the shared
101 * stats hash table. This allocation will be done as part of the main shared
102 * memory, rather than dynamic shared memory, allowing it to be initialized in
103 * postmaster.
105 static Size
106 pgstat_dsa_init_size(void)
108 Size sz;
111 * The dshash header / initial buckets array needs to fit into "plain"
112 * shared memory, but it's beneficial to not need dsm segments
113 * immediately. A size of 256kB seems works well and is not
114 * disproportional compared to other constant sized shared memory
115 * allocations. NB: To avoid DSMs further, the user can configure
116 * min_dynamic_shared_memory.
118 sz = 256 * 1024;
119 Assert(dsa_minimum_size() <= sz);
120 return MAXALIGN(sz);
124 * Compute shared memory space needed for cumulative statistics
126 Size
127 StatsShmemSize(void)
129 Size sz;
131 sz = MAXALIGN(sizeof(PgStat_ShmemControl));
132 sz = add_size(sz, pgstat_dsa_init_size());
134 /* Add shared memory for all the custom fixed-numbered statistics */
135 for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
137 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
139 if (!kind_info)
140 continue;
141 if (!kind_info->fixed_amount)
142 continue;
144 Assert(kind_info->shared_size != 0);
146 sz += MAXALIGN(kind_info->shared_size);
149 return sz;
153 * Initialize cumulative statistics system during startup
155 void
156 StatsShmemInit(void)
158 bool found;
159 Size sz;
161 sz = StatsShmemSize();
162 pgStatLocal.shmem = (PgStat_ShmemControl *)
163 ShmemInitStruct("Shared Memory Stats", sz, &found);
165 if (!IsUnderPostmaster)
167 dsa_area *dsa;
168 dshash_table *dsh;
169 PgStat_ShmemControl *ctl = pgStatLocal.shmem;
170 char *p = (char *) ctl;
172 Assert(!found);
174 /* the allocation of pgStatLocal.shmem itself */
175 p += MAXALIGN(sizeof(PgStat_ShmemControl));
178 * Create a small dsa allocation in plain shared memory. This is
179 * required because postmaster cannot use dsm segments. It also
180 * provides a small efficiency win.
182 ctl->raw_dsa_area = p;
183 p += MAXALIGN(pgstat_dsa_init_size());
184 dsa = dsa_create_in_place(ctl->raw_dsa_area,
185 pgstat_dsa_init_size(),
186 LWTRANCHE_PGSTATS_DSA, 0);
187 dsa_pin(dsa);
190 * To ensure dshash is created in "plain" shared memory, temporarily
191 * limit size of dsa to the initial size of the dsa.
193 dsa_set_size_limit(dsa, pgstat_dsa_init_size());
196 * With the limit in place, create the dshash table. XXX: It'd be nice
197 * if there were dshash_create_in_place().
199 dsh = dshash_create(dsa, &dsh_params, NULL);
200 ctl->hash_handle = dshash_get_hash_table_handle(dsh);
202 /* lift limit set above */
203 dsa_set_size_limit(dsa, -1);
206 * Postmaster will never access these again, thus free the local
207 * dsa/dshash references.
209 dshash_detach(dsh);
210 dsa_detach(dsa);
212 pg_atomic_init_u64(&ctl->gc_request_count, 1);
214 /* initialize fixed-numbered stats */
215 for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
217 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
218 char *ptr;
220 if (!kind_info || !kind_info->fixed_amount)
221 continue;
223 if (pgstat_is_kind_builtin(kind))
224 ptr = ((char *) ctl) + kind_info->shared_ctl_off;
225 else
227 int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
229 Assert(kind_info->shared_size != 0);
230 ctl->custom_data[idx] = ShmemAlloc(kind_info->shared_size);
231 ptr = ctl->custom_data[idx];
234 kind_info->init_shmem_cb(ptr);
237 else
239 Assert(found);
243 void
244 pgstat_attach_shmem(void)
246 MemoryContext oldcontext;
248 Assert(pgStatLocal.dsa == NULL);
250 /* stats shared memory persists for the backend lifetime */
251 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
253 pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
254 NULL);
255 dsa_pin_mapping(pgStatLocal.dsa);
257 pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
258 pgStatLocal.shmem->hash_handle, 0);
260 MemoryContextSwitchTo(oldcontext);
263 void
264 pgstat_detach_shmem(void)
266 Assert(pgStatLocal.dsa);
268 /* we shouldn't leave references to shared stats */
269 pgstat_release_all_entry_refs(false);
271 dshash_detach(pgStatLocal.shared_hash);
272 pgStatLocal.shared_hash = NULL;
274 dsa_detach(pgStatLocal.dsa);
277 * dsa_detach() does not decrement the DSA reference count as no segment
278 * was provided to dsa_attach_in_place(), causing no cleanup callbacks to
279 * be registered. Hence, release it manually now.
281 dsa_release_in_place(pgStatLocal.shmem->raw_dsa_area);
283 pgStatLocal.dsa = NULL;
287 /* ------------------------------------------------------------
288 * Maintenance of shared memory stats entries
289 * ------------------------------------------------------------
292 PgStatShared_Common *
293 pgstat_init_entry(PgStat_Kind kind,
294 PgStatShared_HashEntry *shhashent)
296 /* Create new stats entry. */
297 dsa_pointer chunk;
298 PgStatShared_Common *shheader;
301 * Initialize refcount to 1, marking it as valid / not dropped. The entry
302 * can't be freed before the initialization because it can't be found as
303 * long as we hold the dshash partition lock. Caller needs to increase
304 * further if a longer lived reference is needed.
306 pg_atomic_init_u32(&shhashent->refcount, 1);
307 shhashent->dropped = false;
309 chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
310 shheader = dsa_get_address(pgStatLocal.dsa, chunk);
311 shheader->magic = 0xdeadbeef;
313 /* Link the new entry from the hash entry. */
314 shhashent->body = chunk;
316 LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
318 return shheader;
321 static PgStatShared_Common *
322 pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
324 PgStatShared_Common *shheader;
326 shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
328 /* mark as not dropped anymore */
329 pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
330 shhashent->dropped = false;
332 /* reinitialize content */
333 Assert(shheader->magic == 0xdeadbeef);
334 memset(pgstat_get_entry_data(kind, shheader), 0,
335 pgstat_get_entry_len(kind));
337 return shheader;
340 static void
341 pgstat_setup_shared_refs(void)
343 if (likely(pgStatEntryRefHash != NULL))
344 return;
346 pgStatEntryRefHash =
347 pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
348 PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
349 pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
350 Assert(pgStatSharedRefAge != 0);
354 * Helper function for pgstat_get_entry_ref().
356 static void
357 pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
358 PgStatShared_HashEntry *shhashent,
359 PgStatShared_Common *shheader)
361 Assert(shheader->magic == 0xdeadbeef);
362 Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
364 pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
366 dshash_release_lock(pgStatLocal.shared_hash, shhashent);
368 entry_ref->shared_stats = shheader;
369 entry_ref->shared_entry = shhashent;
373 * Helper function for pgstat_get_entry_ref().
375 static bool
376 pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
378 bool found;
379 PgStat_EntryRefHashEntry *cache_entry;
382 * We immediately insert a cache entry, because it avoids 1) multiple
383 * hashtable lookups in case of a cache miss 2) having to deal with
384 * out-of-memory errors after incrementing PgStatShared_Common->refcount.
387 cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
389 if (!found || !cache_entry->entry_ref)
391 PgStat_EntryRef *entry_ref;
393 cache_entry->entry_ref = entry_ref =
394 MemoryContextAlloc(pgStatSharedRefContext,
395 sizeof(PgStat_EntryRef));
396 entry_ref->shared_stats = NULL;
397 entry_ref->shared_entry = NULL;
398 entry_ref->pending = NULL;
400 found = false;
402 else if (cache_entry->entry_ref->shared_stats == NULL)
404 Assert(cache_entry->entry_ref->pending == NULL);
405 found = false;
407 else
409 PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
411 entry_ref = cache_entry->entry_ref;
412 Assert(entry_ref->shared_entry != NULL);
413 Assert(entry_ref->shared_stats != NULL);
415 Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
416 /* should have at least our reference */
417 Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
420 *entry_ref_p = cache_entry->entry_ref;
421 return found;
425 * Get a shared stats reference. If create is true, the shared stats object is
426 * created if it does not exist.
428 * When create is true, and created_entry is non-NULL, it'll be set to true
429 * if the entry is newly created, false otherwise.
431 PgStat_EntryRef *
432 pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create,
433 bool *created_entry)
435 PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objid = objid};
436 PgStatShared_HashEntry *shhashent;
437 PgStatShared_Common *shheader = NULL;
438 PgStat_EntryRef *entry_ref;
441 * passing in created_entry only makes sense if we possibly could create
442 * entry.
444 Assert(create || created_entry == NULL);
445 pgstat_assert_is_up();
446 Assert(pgStatLocal.shared_hash != NULL);
447 Assert(!pgStatLocal.shmem->is_shutdown);
449 pgstat_setup_memcxt();
450 pgstat_setup_shared_refs();
452 if (created_entry != NULL)
453 *created_entry = false;
456 * Check if other backends dropped stats that could not be deleted because
457 * somebody held references to it. If so, check this backend's references.
458 * This is not expected to happen often. The location of the check is a
459 * bit random, but this is a relatively frequently called path, so better
460 * than most.
462 if (pgstat_need_entry_refs_gc())
463 pgstat_gc_entry_refs();
466 * First check the lookup cache hashtable in local memory. If we find a
467 * match here we can avoid taking locks / causing contention.
469 if (pgstat_get_entry_ref_cached(key, &entry_ref))
470 return entry_ref;
472 Assert(entry_ref != NULL);
475 * Do a lookup in the hash table first - it's quite likely that the entry
476 * already exists, and that way we only need a shared lock.
478 shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
480 if (create && !shhashent)
482 bool shfound;
485 * It's possible that somebody created the entry since the above
486 * lookup. If so, fall through to the same path as if we'd have if it
487 * already had been created before the dshash_find() calls.
489 shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
490 if (!shfound)
492 shheader = pgstat_init_entry(kind, shhashent);
493 pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
495 if (created_entry != NULL)
496 *created_entry = true;
498 return entry_ref;
502 if (!shhashent)
505 * If we're not creating, delete the reference again. In all
506 * likelihood it's just a stats lookup - no point wasting memory for a
507 * shared ref to nothing...
509 pgstat_release_entry_ref(key, entry_ref, false);
511 return NULL;
513 else
516 * Can get here either because dshash_find() found a match, or if
517 * dshash_find_or_insert() found a concurrently inserted entry.
520 if (shhashent->dropped && create)
523 * There are legitimate cases where the old stats entry might not
524 * yet have been dropped by the time it's reused. The most obvious
525 * case are replication slot stats, where a new slot can be
526 * created with the same index just after dropping. But oid
527 * wraparound can lead to other cases as well. We just reset the
528 * stats to their plain state.
530 shheader = pgstat_reinit_entry(kind, shhashent);
531 pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
533 if (created_entry != NULL)
534 *created_entry = true;
536 return entry_ref;
538 else if (shhashent->dropped)
540 dshash_release_lock(pgStatLocal.shared_hash, shhashent);
541 pgstat_release_entry_ref(key, entry_ref, false);
543 return NULL;
545 else
547 shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
548 pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
550 return entry_ref;
555 static void
556 pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
557 bool discard_pending)
559 if (entry_ref && entry_ref->pending)
561 if (discard_pending)
562 pgstat_delete_pending_entry(entry_ref);
563 else
564 elog(ERROR, "releasing ref with pending data");
567 if (entry_ref && entry_ref->shared_stats)
569 Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
570 Assert(entry_ref->pending == NULL);
573 * This can't race with another backend looking up the stats entry and
574 * increasing the refcount because it is not "legal" to create
575 * additional references to dropped entries.
577 if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
579 PgStatShared_HashEntry *shent;
582 * We're the last referrer to this entry, try to drop the shared
583 * entry.
586 /* only dropped entries can reach a 0 refcount */
587 Assert(entry_ref->shared_entry->dropped);
589 shent = dshash_find(pgStatLocal.shared_hash,
590 &entry_ref->shared_entry->key,
591 true);
592 if (!shent)
593 elog(ERROR, "could not find just referenced shared stats entry");
595 Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
596 Assert(entry_ref->shared_entry == shent);
598 pgstat_free_entry(shent, NULL);
602 if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
603 elog(ERROR, "entry ref vanished before deletion");
605 if (entry_ref)
606 pfree(entry_ref);
609 bool
610 pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
612 LWLock *lock = &entry_ref->shared_stats->lock;
614 if (nowait)
615 return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
617 LWLockAcquire(lock, LW_EXCLUSIVE);
618 return true;
622 * Separate from pgstat_lock_entry() as most callers will need to lock
623 * exclusively.
625 bool
626 pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
628 LWLock *lock = &entry_ref->shared_stats->lock;
630 if (nowait)
631 return LWLockConditionalAcquire(lock, LW_SHARED);
633 LWLockAcquire(lock, LW_SHARED);
634 return true;
637 void
638 pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
640 LWLockRelease(&entry_ref->shared_stats->lock);
644 * Helper function to fetch and lock shared stats.
646 PgStat_EntryRef *
647 pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, uint64 objid,
648 bool nowait)
650 PgStat_EntryRef *entry_ref;
652 /* find shared table stats entry corresponding to the local entry */
653 entry_ref = pgstat_get_entry_ref(kind, dboid, objid, true, NULL);
655 /* lock the shared entry to protect the content, skip if failed */
656 if (!pgstat_lock_entry(entry_ref, nowait))
657 return NULL;
659 return entry_ref;
662 void
663 pgstat_request_entry_refs_gc(void)
665 pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
668 static bool
669 pgstat_need_entry_refs_gc(void)
671 uint64 curage;
673 if (!pgStatEntryRefHash)
674 return false;
676 /* should have been initialized when creating pgStatEntryRefHash */
677 Assert(pgStatSharedRefAge != 0);
679 curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
681 return pgStatSharedRefAge != curage;
684 static void
685 pgstat_gc_entry_refs(void)
687 pgstat_entry_ref_hash_iterator i;
688 PgStat_EntryRefHashEntry *ent;
689 uint64 curage;
691 curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
692 Assert(curage != 0);
695 * Some entries have been dropped. Invalidate cache pointer to them.
697 pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
698 while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
700 PgStat_EntryRef *entry_ref = ent->entry_ref;
702 Assert(!entry_ref->shared_stats ||
703 entry_ref->shared_stats->magic == 0xdeadbeef);
705 if (!entry_ref->shared_entry->dropped)
706 continue;
708 /* cannot gc shared ref that has pending data */
709 if (entry_ref->pending != NULL)
710 continue;
712 pgstat_release_entry_ref(ent->key, entry_ref, false);
715 pgStatSharedRefAge = curage;
718 static void
719 pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
720 Datum match_data)
722 pgstat_entry_ref_hash_iterator i;
723 PgStat_EntryRefHashEntry *ent;
725 if (pgStatEntryRefHash == NULL)
726 return;
728 pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
730 while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
731 != NULL)
733 Assert(ent->entry_ref != NULL);
735 if (match && !match(ent, match_data))
736 continue;
738 pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
743 * Release all local references to shared stats entries.
745 * When a process exits it cannot do so while still holding references onto
746 * stats entries, otherwise the shared stats entries could never be freed.
748 static void
749 pgstat_release_all_entry_refs(bool discard_pending)
751 if (pgStatEntryRefHash == NULL)
752 return;
754 pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
755 Assert(pgStatEntryRefHash->members == 0);
756 pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
757 pgStatEntryRefHash = NULL;
760 static bool
761 match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
763 Oid dboid = DatumGetObjectId(match_data);
765 return ent->key.dboid == dboid;
768 static void
769 pgstat_release_db_entry_refs(Oid dboid)
771 pgstat_release_matching_entry_refs( /* discard pending = */ true,
772 match_db,
773 ObjectIdGetDatum(dboid));
777 /* ------------------------------------------------------------
778 * Dropping and resetting of stats entries
779 * ------------------------------------------------------------
782 static void
783 pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
785 dsa_pointer pdsa;
788 * Fetch dsa pointer before deleting entry - that way we can free the
789 * memory after releasing the lock.
791 pdsa = shent->body;
793 if (!hstat)
794 dshash_delete_entry(pgStatLocal.shared_hash, shent);
795 else
796 dshash_delete_current(hstat);
798 dsa_free(pgStatLocal.dsa, pdsa);
802 * Helper for both pgstat_drop_database_and_contents() and
803 * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
804 * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
805 * case the entry needs to be already locked.
807 static bool
808 pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
809 dshash_seq_status *hstat)
811 Assert(shent->body != InvalidDsaPointer);
813 /* should already have released local reference */
814 if (pgStatEntryRefHash)
815 Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
818 * Signal that the entry is dropped - this will eventually cause other
819 * backends to release their references.
821 if (shent->dropped)
822 elog(ERROR,
823 "trying to drop stats entry already dropped: kind=%s dboid=%u objid=%llu refcount=%u",
824 pgstat_get_kind_info(shent->key.kind)->name,
825 shent->key.dboid,
826 (unsigned long long) shent->key.objid,
827 pg_atomic_read_u32(&shent->refcount));
828 shent->dropped = true;
830 /* release refcount marking entry as not dropped */
831 if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
833 pgstat_free_entry(shent, hstat);
834 return true;
836 else
838 if (!hstat)
839 dshash_release_lock(pgStatLocal.shared_hash, shent);
840 return false;
845 * Drop stats for the database and all the objects inside that database.
847 static void
848 pgstat_drop_database_and_contents(Oid dboid)
850 dshash_seq_status hstat;
851 PgStatShared_HashEntry *p;
852 uint64 not_freed_count = 0;
854 Assert(OidIsValid(dboid));
856 Assert(pgStatLocal.shared_hash != NULL);
859 * This backend might very well be the only backend holding a reference to
860 * about-to-be-dropped entries. Ensure that we're not preventing it from
861 * being cleaned up till later.
863 * Doing this separately from the dshash iteration below avoids having to
864 * do so while holding a partition lock on the shared hashtable.
866 pgstat_release_db_entry_refs(dboid);
868 /* some of the dshash entries are to be removed, take exclusive lock. */
869 dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
870 while ((p = dshash_seq_next(&hstat)) != NULL)
872 if (p->dropped)
873 continue;
875 if (p->key.dboid != dboid)
876 continue;
878 if (!pgstat_drop_entry_internal(p, &hstat))
881 * Even statistics for a dropped database might currently be
882 * accessed (consider e.g. database stats for pg_stat_database).
884 not_freed_count++;
887 dshash_seq_term(&hstat);
890 * If some of the stats data could not be freed, signal the reference
891 * holders to run garbage collection of their cached pgStatLocal.shmem.
893 if (not_freed_count > 0)
894 pgstat_request_entry_refs_gc();
898 * Drop a single stats entry.
900 * This routine returns false if the stats entry of the dropped object could
901 * not be freed, true otherwise.
903 * The callers of this function should call pgstat_request_entry_refs_gc()
904 * if the stats entry could not be freed, to ensure that this entry's memory
905 * can be reclaimed later by a different backend calling
906 * pgstat_gc_entry_refs().
908 bool
909 pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
911 PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objid = objid};
912 PgStatShared_HashEntry *shent;
913 bool freed = true;
915 /* delete local reference */
916 if (pgStatEntryRefHash)
918 PgStat_EntryRefHashEntry *lohashent =
919 pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
921 if (lohashent)
922 pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
923 true);
926 /* mark entry in shared hashtable as deleted, drop if possible */
927 shent = dshash_find(pgStatLocal.shared_hash, &key, true);
928 if (shent)
930 freed = pgstat_drop_entry_internal(shent, NULL);
933 * Database stats contain other stats. Drop those as well when
934 * dropping the database. XXX: Perhaps this should be done in a
935 * slightly more principled way? But not obvious what that'd look
936 * like, and so far this is the only case...
938 if (key.kind == PGSTAT_KIND_DATABASE)
939 pgstat_drop_database_and_contents(key.dboid);
942 return freed;
945 void
946 pgstat_drop_all_entries(void)
948 dshash_seq_status hstat;
949 PgStatShared_HashEntry *ps;
950 uint64 not_freed_count = 0;
952 dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
953 while ((ps = dshash_seq_next(&hstat)) != NULL)
955 if (ps->dropped)
956 continue;
958 if (!pgstat_drop_entry_internal(ps, &hstat))
959 not_freed_count++;
961 dshash_seq_term(&hstat);
963 if (not_freed_count > 0)
964 pgstat_request_entry_refs_gc();
967 static void
968 shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
969 TimestampTz ts)
971 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
973 memset(pgstat_get_entry_data(kind, header), 0,
974 pgstat_get_entry_len(kind));
976 if (kind_info->reset_timestamp_cb)
977 kind_info->reset_timestamp_cb(header, ts);
981 * Reset one variable-numbered stats entry.
983 void
984 pgstat_reset_entry(PgStat_Kind kind, Oid dboid, uint64 objid, TimestampTz ts)
986 PgStat_EntryRef *entry_ref;
988 Assert(!pgstat_get_kind_info(kind)->fixed_amount);
990 entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
991 if (!entry_ref || entry_ref->shared_entry->dropped)
992 return;
994 (void) pgstat_lock_entry(entry_ref, false);
995 shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
996 pgstat_unlock_entry(entry_ref);
1000 * Scan through the shared hashtable of stats, resetting statistics if
1001 * approved by the provided do_reset() function.
1003 void
1004 pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
1005 Datum match_data, TimestampTz ts)
1007 dshash_seq_status hstat;
1008 PgStatShared_HashEntry *p;
1010 /* dshash entry is not modified, take shared lock */
1011 dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1012 while ((p = dshash_seq_next(&hstat)) != NULL)
1014 PgStatShared_Common *header;
1016 if (p->dropped)
1017 continue;
1019 if (!do_reset(p, match_data))
1020 continue;
1022 header = dsa_get_address(pgStatLocal.dsa, p->body);
1024 LWLockAcquire(&header->lock, LW_EXCLUSIVE);
1026 shared_stat_reset_contents(p->key.kind, header, ts);
1028 LWLockRelease(&header->lock);
1030 dshash_seq_term(&hstat);
1033 static bool
1034 match_kind(PgStatShared_HashEntry *p, Datum match_data)
1036 return p->key.kind == DatumGetInt32(match_data);
1039 void
1040 pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
1042 pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
1045 static void
1046 pgstat_setup_memcxt(void)
1048 if (unlikely(!pgStatSharedRefContext))
1049 pgStatSharedRefContext =
1050 AllocSetContextCreate(TopMemoryContext,
1051 "PgStat Shared Ref",
1052 ALLOCSET_SMALL_SIZES);
1053 if (unlikely(!pgStatEntryRefHashContext))
1054 pgStatEntryRefHashContext =
1055 AllocSetContextCreate(TopMemoryContext,
1056 "PgStat Shared Ref Hash",
1057 ALLOCSET_SMALL_SIZES);