Clear padding of PgStat_HashKey when handling pgstats entries
[pgsql.git] / src / backend / utils / activity / pgstat.c
blobea8c5691e87501704db6e04d389f9cb6c4be8887
1 /* ----------
2 * pgstat.c
3 * Infrastructure for the cumulative statistics system.
5 * The cumulative statistics system accumulates statistics for different kinds
6 * of objects. Some kinds of statistics are collected for a fixed number of
7 * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8 * statistics are collected for a varying number of objects
9 * (e.g. relations). See PgStat_KindInfo for a list of currently handled
10 * statistics.
12 * Statistics are loaded from the filesystem during startup (by the startup
13 * process), unless preceded by a crash, in which case all stats are
14 * discarded. They are written out by the checkpointer process just before
15 * shutting down, except when shutting down in immediate mode.
17 * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
19 * Statistics for variable-numbered objects are stored in dynamic shared
20 * memory and can be found via a dshash hashtable. The statistics counters are
21 * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
22 * separately allocated (PgStatShared_HashEntry->body). The separate
23 * allocation allows different kinds of statistics to be stored in the same
24 * hashtable without wasting space in PgStatShared_HashEntry.
26 * Variable-numbered stats are addressed by PgStat_HashKey while running. It
27 * is not possible to have statistics for an object that cannot be addressed
28 * that way at runtime. A wider identifier can be used when serializing to
29 * disk (used for replication slot stats).
31 * To avoid contention on the shared hashtable, each backend has a
32 * backend-local hashtable (pgStatEntryRefHash) in front of the shared
33 * hashtable, containing references (PgStat_EntryRef) to shared hashtable
34 * entries. The shared hashtable only needs to be accessed when no prior
35 * reference is found in the local hashtable. Besides pointing to the
36 * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
37 * contains a pointer to the shared statistics data, as a process-local
38 * address, to reduce access costs.
40 * The names for structs stored in shared memory are prefixed with
41 * PgStatShared instead of PgStat. Each stats entry in shared memory is
42 * protected by a dedicated lwlock.
44 * Most stats updates are first accumulated locally in each process as pending
45 * entries, then later flushed to shared memory (just after commit, or by
46 * idle-timeout). This practically eliminates contention on individual stats
47 * entries. For most kinds of variable-numbered pending stats data is stored
48 * in PgStat_EntryRef->pending. All entries with pending data are in the
49 * pgStatPending list. Pending statistics updates are flushed out by
50 * pgstat_report_stat().
52 * It is possible for external modules to define custom statistics kinds,
53 * that can use the same properties as any built-in stats kinds. Each custom
54 * stats kind needs to assign a unique ID to ensure that it does not overlap
55 * with other extensions. In order to reserve a unique stats kind ID, refer
56 * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
58 * The behavior of different kinds of statistics is determined by the kind's
59 * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
60 * defined, and pgstat_kind_custom_infos for custom kinds registered at
61 * startup by pgstat_register_kind(). See PgStat_KindInfo for details.
63 * The consistency of read accesses to statistics can be configured using the
64 * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
65 * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
66 * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
67 * pgStatLocal.snapshot.
69 * To keep things manageable, stats handling is split across several
70 * files. Infrastructure pieces are in:
71 * - pgstat.c - this file, to tie it all together
72 * - pgstat_shmem.c - nearly everything dealing with shared memory, including
73 * the maintenance of hashtable entries
74 * - pgstat_xact.c - transactional integration, including the transactional
75 * creation and dropping of stats entries
77 * Each statistics kind is handled in a dedicated file:
78 * - pgstat_archiver.c
79 * - pgstat_bgwriter.c
80 * - pgstat_checkpointer.c
81 * - pgstat_database.c
82 * - pgstat_function.c
83 * - pgstat_io.c
84 * - pgstat_relation.c
85 * - pgstat_replslot.c
86 * - pgstat_slru.c
87 * - pgstat_subscription.c
88 * - pgstat_wal.c
90 * Whenever possible infrastructure files should not contain code related to
91 * specific kinds of stats.
94 * Copyright (c) 2001-2024, PostgreSQL Global Development Group
96 * IDENTIFICATION
97 * src/backend/utils/activity/pgstat.c
98 * ----------
100 #include "postgres.h"
102 #include <unistd.h>
104 #include "access/xact.h"
105 #include "access/xlog.h"
106 #include "lib/dshash.h"
107 #include "pgstat.h"
108 #include "storage/fd.h"
109 #include "storage/ipc.h"
110 #include "storage/lwlock.h"
111 #include "utils/guc_hooks.h"
112 #include "utils/memutils.h"
113 #include "utils/pgstat_internal.h"
114 #include "utils/timestamp.h"
117 /* ----------
118 * Timer definitions.
120 * In milliseconds.
121 * ----------
124 /* minimum interval non-forced stats flushes.*/
125 #define PGSTAT_MIN_INTERVAL 1000
126 /* how long until to block flushing pending stats updates */
127 #define PGSTAT_MAX_INTERVAL 60000
128 /* when to call pgstat_report_stat() again, even when idle */
129 #define PGSTAT_IDLE_INTERVAL 10000
131 /* ----------
132 * Initial size hints for the hash tables used in statistics.
133 * ----------
136 #define PGSTAT_SNAPSHOT_HASH_SIZE 512
138 /* ---------
139 * Identifiers in stats file.
140 * ---------
142 #define PGSTAT_FILE_ENTRY_END 'E' /* end of file */
143 #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
144 #define PGSTAT_FILE_ENTRY_NAME 'N' /* stats entry identified by name */
145 #define PGSTAT_FILE_ENTRY_HASH 'S' /* stats entry identified by
146 * PgStat_HashKey */
148 /* hash table for statistics snapshots entry */
149 typedef struct PgStat_SnapshotEntry
151 PgStat_HashKey key;
152 char status; /* for simplehash use */
153 void *data; /* the stats data itself */
154 } PgStat_SnapshotEntry;
157 /* ----------
158 * Backend-local Hash Table Definitions
159 * ----------
162 /* for stats snapshot entries */
163 #define SH_PREFIX pgstat_snapshot
164 #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
165 #define SH_KEY_TYPE PgStat_HashKey
166 #define SH_KEY key
167 #define SH_HASH_KEY(tb, key) \
168 pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
169 #define SH_EQUAL(tb, a, b) \
170 pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
171 #define SH_SCOPE static inline
172 #define SH_DEFINE
173 #define SH_DECLARE
174 #include "lib/simplehash.h"
177 /* ----------
178 * Local function forward declarations
179 * ----------
182 static void pgstat_write_statsfile(XLogRecPtr redo);
183 static void pgstat_read_statsfile(XLogRecPtr redo);
185 static void pgstat_init_snapshot_fixed(void);
187 static void pgstat_reset_after_failure(void);
189 static bool pgstat_flush_pending_entries(bool nowait);
191 static void pgstat_prep_snapshot(void);
192 static void pgstat_build_snapshot(void);
193 static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
195 static inline bool pgstat_is_kind_valid(PgStat_Kind kind);
198 /* ----------
199 * GUC parameters
200 * ----------
203 bool pgstat_track_counts = false;
204 int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
207 /* ----------
208 * state shared with pgstat_*.c
209 * ----------
212 PgStat_LocalState pgStatLocal;
215 /* ----------
216 * Local data
218 * NB: There should be only variables related to stats infrastructure here,
219 * not for specific kinds of stats.
220 * ----------
224 * Memory contexts containing the pgStatEntryRefHash table, the
225 * pgStatSharedRef entries, and pending data respectively. Mostly to make it
226 * easier to track / attribute memory usage.
229 static MemoryContext pgStatPendingContext = NULL;
232 * Backend local list of PgStat_EntryRef with unflushed pending stats.
234 * Newly pending entries should only ever be added to the end of the list,
235 * otherwise pgstat_flush_pending_entries() might not see them immediately.
237 static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
241 * Force the next stats flush to happen regardless of
242 * PGSTAT_MIN_INTERVAL. Useful in test scripts.
244 static bool pgStatForceNextFlush = false;
247 * Force-clear existing snapshot before next use when stats_fetch_consistency
248 * is changed.
250 static bool force_stats_snapshot_clear = false;
254 * For assertions that check pgstat is not used before initialization / after
255 * shutdown.
257 #ifdef USE_ASSERT_CHECKING
258 static bool pgstat_is_initialized = false;
259 static bool pgstat_is_shutdown = false;
260 #endif
264 * The different kinds of built-in statistics.
266 * If reasonably possible, handling specific to one kind of stats should go
267 * through this abstraction, rather than making more of pgstat.c aware.
269 * See comments for struct PgStat_KindInfo for details about the individual
270 * fields.
272 * XXX: It'd be nicer to define this outside of this file. But there doesn't
273 * seem to be a great way of doing that, given the split across multiple
274 * files.
276 static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] = {
278 /* stats kinds for variable-numbered objects */
280 [PGSTAT_KIND_DATABASE] = {
281 .name = "database",
283 .fixed_amount = false,
284 /* so pg_stat_database entries can be seen in all databases */
285 .accessed_across_databases = true,
287 .shared_size = sizeof(PgStatShared_Database),
288 .shared_data_off = offsetof(PgStatShared_Database, stats),
289 .shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
290 .pending_size = sizeof(PgStat_StatDBEntry),
292 .flush_pending_cb = pgstat_database_flush_cb,
293 .reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
296 [PGSTAT_KIND_RELATION] = {
297 .name = "relation",
299 .fixed_amount = false,
301 .shared_size = sizeof(PgStatShared_Relation),
302 .shared_data_off = offsetof(PgStatShared_Relation, stats),
303 .shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
304 .pending_size = sizeof(PgStat_TableStatus),
306 .flush_pending_cb = pgstat_relation_flush_cb,
307 .delete_pending_cb = pgstat_relation_delete_pending_cb,
310 [PGSTAT_KIND_FUNCTION] = {
311 .name = "function",
313 .fixed_amount = false,
315 .shared_size = sizeof(PgStatShared_Function),
316 .shared_data_off = offsetof(PgStatShared_Function, stats),
317 .shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
318 .pending_size = sizeof(PgStat_FunctionCounts),
320 .flush_pending_cb = pgstat_function_flush_cb,
323 [PGSTAT_KIND_REPLSLOT] = {
324 .name = "replslot",
326 .fixed_amount = false,
328 .accessed_across_databases = true,
330 .shared_size = sizeof(PgStatShared_ReplSlot),
331 .shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
332 .shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
334 .reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
335 .to_serialized_name = pgstat_replslot_to_serialized_name_cb,
336 .from_serialized_name = pgstat_replslot_from_serialized_name_cb,
339 [PGSTAT_KIND_SUBSCRIPTION] = {
340 .name = "subscription",
342 .fixed_amount = false,
343 /* so pg_stat_subscription_stats entries can be seen in all databases */
344 .accessed_across_databases = true,
346 .shared_size = sizeof(PgStatShared_Subscription),
347 .shared_data_off = offsetof(PgStatShared_Subscription, stats),
348 .shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
349 .pending_size = sizeof(PgStat_BackendSubEntry),
351 .flush_pending_cb = pgstat_subscription_flush_cb,
352 .reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
356 /* stats for fixed-numbered (mostly 1) objects */
358 [PGSTAT_KIND_ARCHIVER] = {
359 .name = "archiver",
361 .fixed_amount = true,
363 .snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
364 .shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
365 .shared_data_off = offsetof(PgStatShared_Archiver, stats),
366 .shared_data_len = sizeof(((PgStatShared_Archiver *) 0)->stats),
368 .init_shmem_cb = pgstat_archiver_init_shmem_cb,
369 .reset_all_cb = pgstat_archiver_reset_all_cb,
370 .snapshot_cb = pgstat_archiver_snapshot_cb,
373 [PGSTAT_KIND_BGWRITER] = {
374 .name = "bgwriter",
376 .fixed_amount = true,
378 .snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
379 .shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
380 .shared_data_off = offsetof(PgStatShared_BgWriter, stats),
381 .shared_data_len = sizeof(((PgStatShared_BgWriter *) 0)->stats),
383 .init_shmem_cb = pgstat_bgwriter_init_shmem_cb,
384 .reset_all_cb = pgstat_bgwriter_reset_all_cb,
385 .snapshot_cb = pgstat_bgwriter_snapshot_cb,
388 [PGSTAT_KIND_CHECKPOINTER] = {
389 .name = "checkpointer",
391 .fixed_amount = true,
393 .snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
394 .shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
395 .shared_data_off = offsetof(PgStatShared_Checkpointer, stats),
396 .shared_data_len = sizeof(((PgStatShared_Checkpointer *) 0)->stats),
398 .init_shmem_cb = pgstat_checkpointer_init_shmem_cb,
399 .reset_all_cb = pgstat_checkpointer_reset_all_cb,
400 .snapshot_cb = pgstat_checkpointer_snapshot_cb,
403 [PGSTAT_KIND_IO] = {
404 .name = "io",
406 .fixed_amount = true,
408 .snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
409 .shared_ctl_off = offsetof(PgStat_ShmemControl, io),
410 .shared_data_off = offsetof(PgStatShared_IO, stats),
411 .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
413 .flush_fixed_cb = pgstat_io_flush_cb,
414 .have_fixed_pending_cb = pgstat_io_have_pending_cb,
415 .init_shmem_cb = pgstat_io_init_shmem_cb,
416 .reset_all_cb = pgstat_io_reset_all_cb,
417 .snapshot_cb = pgstat_io_snapshot_cb,
420 [PGSTAT_KIND_SLRU] = {
421 .name = "slru",
423 .fixed_amount = true,
425 .snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
426 .shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
427 .shared_data_off = offsetof(PgStatShared_SLRU, stats),
428 .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
430 .flush_fixed_cb = pgstat_slru_flush_cb,
431 .have_fixed_pending_cb = pgstat_slru_have_pending_cb,
432 .init_shmem_cb = pgstat_slru_init_shmem_cb,
433 .reset_all_cb = pgstat_slru_reset_all_cb,
434 .snapshot_cb = pgstat_slru_snapshot_cb,
437 [PGSTAT_KIND_WAL] = {
438 .name = "wal",
440 .fixed_amount = true,
442 .snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
443 .shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
444 .shared_data_off = offsetof(PgStatShared_Wal, stats),
445 .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
447 .init_backend_cb = pgstat_wal_init_backend_cb,
448 .flush_fixed_cb = pgstat_wal_flush_cb,
449 .have_fixed_pending_cb = pgstat_wal_have_pending_cb,
450 .init_shmem_cb = pgstat_wal_init_shmem_cb,
451 .reset_all_cb = pgstat_wal_reset_all_cb,
452 .snapshot_cb = pgstat_wal_snapshot_cb,
457 * Information about custom statistics kinds.
459 * These are saved in a different array than the built-in kinds to save
460 * in clarity with the initializations.
462 * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
464 static const PgStat_KindInfo **pgstat_kind_custom_infos = NULL;
466 /* ------------------------------------------------------------
467 * Functions managing the state of the stats system for all backends.
468 * ------------------------------------------------------------
472 * Read on-disk stats into memory at server start.
474 * Should only be called by the startup process or in single user mode.
476 void
477 pgstat_restore_stats(XLogRecPtr redo)
479 pgstat_read_statsfile(redo);
483 * Remove the stats file. This is currently used only if WAL recovery is
484 * needed after a crash.
486 * Should only be called by the startup process or in single user mode.
488 void
489 pgstat_discard_stats(void)
491 int ret;
493 /* NB: this needs to be done even in single user mode */
495 ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
496 if (ret != 0)
498 if (errno == ENOENT)
499 elog(DEBUG2,
500 "didn't need to unlink permanent stats file \"%s\" - didn't exist",
501 PGSTAT_STAT_PERMANENT_FILENAME);
502 else
503 ereport(LOG,
504 (errcode_for_file_access(),
505 errmsg("could not unlink permanent statistics file \"%s\": %m",
506 PGSTAT_STAT_PERMANENT_FILENAME)));
508 else
510 ereport(DEBUG2,
511 (errcode_for_file_access(),
512 errmsg_internal("unlinked permanent statistics file \"%s\"",
513 PGSTAT_STAT_PERMANENT_FILENAME)));
517 * Reset stats contents. This will set reset timestamps of fixed-numbered
518 * stats to the current time (no variable stats exist).
520 pgstat_reset_after_failure();
524 * pgstat_before_server_shutdown() needs to be called by exactly one process
525 * during regular server shutdowns. Otherwise all stats will be lost.
527 * We currently only write out stats for proc_exit(0). We might want to change
528 * that at some point... But right now pgstat_discard_stats() would be called
529 * during the start after a disorderly shutdown, anyway.
531 void
532 pgstat_before_server_shutdown(int code, Datum arg)
534 Assert(pgStatLocal.shmem != NULL);
535 Assert(!pgStatLocal.shmem->is_shutdown);
538 * Stats should only be reported after pgstat_initialize() and before
539 * pgstat_shutdown(). This is a convenient point to catch most violations
540 * of this rule.
542 Assert(pgstat_is_initialized && !pgstat_is_shutdown);
544 /* flush out our own pending changes before writing out */
545 pgstat_report_stat(true);
548 * Only write out file during normal shutdown. Don't even signal that
549 * we've shutdown during irregular shutdowns, because the shutdown
550 * sequence isn't coordinated to ensure this backend shuts down last.
552 if (code == 0)
554 pgStatLocal.shmem->is_shutdown = true;
555 pgstat_write_statsfile(GetRedoRecPtr());
560 /* ------------------------------------------------------------
561 * Backend initialization / shutdown functions
562 * ------------------------------------------------------------
566 * Shut down a single backend's statistics reporting at process exit.
568 * Flush out any remaining statistics counts. Without this, operations
569 * triggered during backend exit (such as temp table deletions) won't be
570 * counted.
572 static void
573 pgstat_shutdown_hook(int code, Datum arg)
575 Assert(!pgstat_is_shutdown);
576 Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
579 * If we got as far as discovering our own database ID, we can flush out
580 * what we did so far. Otherwise, we'd be reporting an invalid database
581 * ID, so forget it. (This means that accesses to pg_database during
582 * failed backend starts might never get counted.)
584 if (OidIsValid(MyDatabaseId))
585 pgstat_report_disconnect(MyDatabaseId);
587 pgstat_report_stat(true);
589 /* there shouldn't be any pending changes left */
590 Assert(dlist_is_empty(&pgStatPending));
591 dlist_init(&pgStatPending);
593 pgstat_detach_shmem();
595 #ifdef USE_ASSERT_CHECKING
596 pgstat_is_shutdown = true;
597 #endif
601 * Initialize pgstats state, and set up our on-proc-exit hook. Called from
602 * BaseInit().
604 * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
606 void
607 pgstat_initialize(void)
609 Assert(!pgstat_is_initialized);
611 pgstat_attach_shmem();
613 pgstat_init_snapshot_fixed();
615 /* Backend initialization callbacks */
616 for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
618 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
620 if (kind_info == NULL || kind_info->init_backend_cb == NULL)
621 continue;
623 kind_info->init_backend_cb();
626 /* Set up a process-exit hook to clean up */
627 before_shmem_exit(pgstat_shutdown_hook, 0);
629 #ifdef USE_ASSERT_CHECKING
630 pgstat_is_initialized = true;
631 #endif
635 /* ------------------------------------------------------------
636 * Public functions used by backends follow
637 * ------------------------------------------------------------
641 * Must be called by processes that performs DML: tcop/postgres.c, logical
642 * receiver processes, SPI worker, etc. to flush pending statistics updates to
643 * shared memory.
645 * Unless called with 'force', pending stats updates are flushed happen once
646 * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
647 * block on lock acquisition, except if stats updates have been pending for
648 * longer than PGSTAT_MAX_INTERVAL (60000ms).
650 * Whenever pending stats updates remain at the end of pgstat_report_stat() a
651 * suggested idle timeout is returned. Currently this is always
652 * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
653 * a timeout after which to call pgstat_report_stat(true), but are not
654 * required to do so.
656 * Note that this is called only when not within a transaction, so it is fair
657 * to use transaction stop time as an approximation of current time.
659 long
660 pgstat_report_stat(bool force)
662 static TimestampTz pending_since = 0;
663 static TimestampTz last_flush = 0;
664 bool partial_flush;
665 TimestampTz now;
666 bool nowait;
668 pgstat_assert_is_up();
669 Assert(!IsTransactionOrTransactionBlock());
671 /* "absorb" the forced flush even if there's nothing to flush */
672 if (pgStatForceNextFlush)
674 force = true;
675 pgStatForceNextFlush = false;
678 /* Don't expend a clock check if nothing to do */
679 if (dlist_is_empty(&pgStatPending))
681 bool do_flush = false;
683 /* Check for pending fixed-numbered stats */
684 for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
686 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
688 if (!kind_info)
689 continue;
690 if (!kind_info->fixed_amount)
692 Assert(kind_info->have_fixed_pending_cb == NULL);
693 continue;
695 if (!kind_info->have_fixed_pending_cb)
696 continue;
698 if (kind_info->have_fixed_pending_cb())
700 do_flush = true;
701 break;
705 if (!do_flush)
707 Assert(pending_since == 0);
708 return 0;
713 * There should never be stats to report once stats are shut down. Can't
714 * assert that before the checks above, as there is an unconditional
715 * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
716 * the process that ran pgstat_before_server_shutdown() will still call.
718 Assert(!pgStatLocal.shmem->is_shutdown);
720 if (force)
723 * Stats reports are forced either when it's been too long since stats
724 * have been reported or in processes that force stats reporting to
725 * happen at specific points (including shutdown). In the former case
726 * the transaction stop time might be quite old, in the latter it
727 * would never get cleared.
729 now = GetCurrentTimestamp();
731 else
733 now = GetCurrentTransactionStopTimestamp();
735 if (pending_since > 0 &&
736 TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
738 /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
739 force = true;
741 else if (last_flush > 0 &&
742 !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
744 /* don't flush too frequently */
745 if (pending_since == 0)
746 pending_since = now;
748 return PGSTAT_IDLE_INTERVAL;
752 pgstat_update_dbstats(now);
754 /* don't wait for lock acquisition when !force */
755 nowait = !force;
757 partial_flush = false;
759 /* flush database / relation / function / ... stats */
760 partial_flush |= pgstat_flush_pending_entries(nowait);
762 /* flush of fixed-numbered stats */
763 for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
765 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
767 if (!kind_info)
768 continue;
769 if (!kind_info->fixed_amount)
771 Assert(kind_info->flush_fixed_cb == NULL);
772 continue;
774 if (!kind_info->flush_fixed_cb)
775 continue;
777 partial_flush |= kind_info->flush_fixed_cb(nowait);
780 last_flush = now;
783 * If some of the pending stats could not be flushed due to lock
784 * contention, let the caller know when to retry.
786 if (partial_flush)
788 /* force should have prevented us from getting here */
789 Assert(!force);
791 /* remember since when stats have been pending */
792 if (pending_since == 0)
793 pending_since = now;
795 return PGSTAT_IDLE_INTERVAL;
798 pending_since = 0;
800 return 0;
804 * Force locally pending stats to be flushed during the next
805 * pgstat_report_stat() call. This is useful for writing tests.
807 void
808 pgstat_force_next_flush(void)
810 pgStatForceNextFlush = true;
814 * Only for use by pgstat_reset_counters()
816 static bool
817 match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
819 return entry->key.dboid == DatumGetObjectId(MyDatabaseId);
823 * Reset counters for our database.
825 * Permission checking for this function is managed through the normal
826 * GRANT system.
828 void
829 pgstat_reset_counters(void)
831 TimestampTz ts = GetCurrentTimestamp();
833 pgstat_reset_matching_entries(match_db_entries,
834 ObjectIdGetDatum(MyDatabaseId),
835 ts);
839 * Reset a single variable-numbered entry.
841 * If the stats kind is within a database, also reset the database's
842 * stat_reset_timestamp.
844 * Permission checking for this function is managed through the normal
845 * GRANT system.
847 void
848 pgstat_reset(PgStat_Kind kind, Oid dboid, uint64 objid)
850 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
851 TimestampTz ts = GetCurrentTimestamp();
853 /* not needed atm, and doesn't make sense with the current signature */
854 Assert(!pgstat_get_kind_info(kind)->fixed_amount);
856 /* reset the "single counter" */
857 pgstat_reset_entry(kind, dboid, objid, ts);
859 if (!kind_info->accessed_across_databases)
860 pgstat_reset_database_timestamp(dboid, ts);
864 * Reset stats for all entries of a kind.
866 * Permission checking for this function is managed through the normal
867 * GRANT system.
869 void
870 pgstat_reset_of_kind(PgStat_Kind kind)
872 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
873 TimestampTz ts = GetCurrentTimestamp();
875 if (kind_info->fixed_amount)
876 kind_info->reset_all_cb(ts);
877 else
878 pgstat_reset_entries_of_kind(kind, ts);
882 /* ------------------------------------------------------------
883 * Fetching of stats
884 * ------------------------------------------------------------
888 * Discard any data collected in the current transaction. Any subsequent
889 * request will cause new snapshots to be read.
891 * This is also invoked during transaction commit or abort to discard
892 * the no-longer-wanted snapshot. Updates of stats_fetch_consistency can
893 * cause this routine to be called.
895 void
896 pgstat_clear_snapshot(void)
898 pgstat_assert_is_up();
900 memset(&pgStatLocal.snapshot.fixed_valid, 0,
901 sizeof(pgStatLocal.snapshot.fixed_valid));
902 memset(&pgStatLocal.snapshot.custom_valid, 0,
903 sizeof(pgStatLocal.snapshot.custom_valid));
904 pgStatLocal.snapshot.stats = NULL;
905 pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
907 /* Release memory, if any was allocated */
908 if (pgStatLocal.snapshot.context)
910 MemoryContextDelete(pgStatLocal.snapshot.context);
912 /* Reset variables */
913 pgStatLocal.snapshot.context = NULL;
917 * Historically the backend_status.c facilities lived in this file, and
918 * were reset with the same function. For now keep it that way, and
919 * forward the reset request.
921 pgstat_clear_backend_activity_snapshot();
923 /* Reset this flag, as it may be possible that a cleanup was forced. */
924 force_stats_snapshot_clear = false;
927 void *
928 pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
930 PgStat_HashKey key;
931 PgStat_EntryRef *entry_ref;
932 void *stats_data;
933 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
935 /* should be called from backends */
936 Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
937 Assert(!kind_info->fixed_amount);
939 pgstat_prep_snapshot();
941 /* clear padding */
942 memset(&key, 0, sizeof(struct PgStat_HashKey));
944 key.kind = kind;
945 key.dboid = dboid;
946 key.objid = objid;
948 /* if we need to build a full snapshot, do so */
949 if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
950 pgstat_build_snapshot();
952 /* if caching is desired, look up in cache */
953 if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
955 PgStat_SnapshotEntry *entry = NULL;
957 entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
959 if (entry)
960 return entry->data;
963 * If we built a full snapshot and the key is not in
964 * pgStatLocal.snapshot.stats, there are no matching stats.
966 if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
967 return NULL;
970 pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
972 entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
974 if (entry_ref == NULL || entry_ref->shared_entry->dropped)
976 /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
977 if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
979 PgStat_SnapshotEntry *entry = NULL;
980 bool found;
982 entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
983 Assert(!found);
984 entry->data = NULL;
986 return NULL;
990 * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
991 * otherwise we could quickly end up with a fair bit of memory used due to
992 * repeated accesses.
994 if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
995 stats_data = palloc(kind_info->shared_data_len);
996 else
997 stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
998 kind_info->shared_data_len);
1000 pgstat_lock_entry_shared(entry_ref, false);
1001 memcpy(stats_data,
1002 pgstat_get_entry_data(kind, entry_ref->shared_stats),
1003 kind_info->shared_data_len);
1004 pgstat_unlock_entry(entry_ref);
1006 if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
1008 PgStat_SnapshotEntry *entry = NULL;
1009 bool found;
1011 entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
1012 entry->data = stats_data;
1015 return stats_data;
1019 * If a stats snapshot has been taken, return the timestamp at which that was
1020 * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
1021 * false.
1023 TimestampTz
1024 pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
1026 if (force_stats_snapshot_clear)
1027 pgstat_clear_snapshot();
1029 if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1031 *have_snapshot = true;
1032 return pgStatLocal.snapshot.snapshot_timestamp;
1035 *have_snapshot = false;
1037 return 0;
1040 bool
1041 pgstat_have_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1043 /* fixed-numbered stats always exist */
1044 if (pgstat_get_kind_info(kind)->fixed_amount)
1045 return true;
1047 return pgstat_get_entry_ref(kind, dboid, objid, false, NULL) != NULL;
1051 * Ensure snapshot for fixed-numbered 'kind' exists.
1053 * Typically used by the pgstat_fetch_* functions for a kind of stats, before
1054 * massaging the data into the desired format.
1056 void
1057 pgstat_snapshot_fixed(PgStat_Kind kind)
1059 Assert(pgstat_is_kind_valid(kind));
1060 Assert(pgstat_get_kind_info(kind)->fixed_amount);
1062 if (force_stats_snapshot_clear)
1063 pgstat_clear_snapshot();
1065 if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1066 pgstat_build_snapshot();
1067 else
1068 pgstat_build_snapshot_fixed(kind);
1070 if (pgstat_is_kind_builtin(kind))
1071 Assert(pgStatLocal.snapshot.fixed_valid[kind]);
1072 else if (pgstat_is_kind_custom(kind))
1073 Assert(pgStatLocal.snapshot.custom_valid[kind - PGSTAT_KIND_CUSTOM_MIN]);
1076 static void
1077 pgstat_init_snapshot_fixed(void)
1080 * Initialize fixed-numbered statistics data in snapshots, only for custom
1081 * stats kinds.
1083 for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1085 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1087 if (!kind_info || !kind_info->fixed_amount)
1088 continue;
1090 pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN] =
1091 MemoryContextAlloc(TopMemoryContext, kind_info->shared_data_len);
1095 static void
1096 pgstat_prep_snapshot(void)
1098 if (force_stats_snapshot_clear)
1099 pgstat_clear_snapshot();
1101 if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
1102 pgStatLocal.snapshot.stats != NULL)
1103 return;
1105 if (!pgStatLocal.snapshot.context)
1106 pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
1107 "PgStat Snapshot",
1108 ALLOCSET_SMALL_SIZES);
1110 pgStatLocal.snapshot.stats =
1111 pgstat_snapshot_create(pgStatLocal.snapshot.context,
1112 PGSTAT_SNAPSHOT_HASH_SIZE,
1113 NULL);
1116 static void
1117 pgstat_build_snapshot(void)
1119 dshash_seq_status hstat;
1120 PgStatShared_HashEntry *p;
1122 /* should only be called when we need a snapshot */
1123 Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
1125 /* snapshot already built */
1126 if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
1127 return;
1129 pgstat_prep_snapshot();
1131 Assert(pgStatLocal.snapshot.stats->members == 0);
1133 pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
1136 * Snapshot all variable stats.
1138 dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1139 while ((p = dshash_seq_next(&hstat)) != NULL)
1141 PgStat_Kind kind = p->key.kind;
1142 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1143 bool found;
1144 PgStat_SnapshotEntry *entry;
1145 PgStatShared_Common *stats_data;
1148 * Check if the stats object should be included in the snapshot.
1149 * Unless the stats kind can be accessed from all databases (e.g.,
1150 * database stats themselves), we only include stats for the current
1151 * database or objects not associated with a database (e.g. shared
1152 * relations).
1154 if (p->key.dboid != MyDatabaseId &&
1155 p->key.dboid != InvalidOid &&
1156 !kind_info->accessed_across_databases)
1157 continue;
1159 if (p->dropped)
1160 continue;
1162 Assert(pg_atomic_read_u32(&p->refcount) > 0);
1164 stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
1165 Assert(stats_data);
1167 entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
1168 Assert(!found);
1170 entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
1171 kind_info->shared_size);
1174 * Acquire the LWLock directly instead of using
1175 * pg_stat_lock_entry_shared() which requires a reference.
1177 LWLockAcquire(&stats_data->lock, LW_SHARED);
1178 memcpy(entry->data,
1179 pgstat_get_entry_data(kind, stats_data),
1180 kind_info->shared_size);
1181 LWLockRelease(&stats_data->lock);
1183 dshash_seq_term(&hstat);
1186 * Build snapshot of all fixed-numbered stats.
1188 for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1190 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1192 if (!kind_info)
1193 continue;
1194 if (!kind_info->fixed_amount)
1196 Assert(kind_info->snapshot_cb == NULL);
1197 continue;
1200 pgstat_build_snapshot_fixed(kind);
1203 pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
1206 static void
1207 pgstat_build_snapshot_fixed(PgStat_Kind kind)
1209 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1210 int idx;
1211 bool *valid;
1213 /* Position in fixed_valid or custom_valid */
1214 if (pgstat_is_kind_builtin(kind))
1216 idx = kind;
1217 valid = pgStatLocal.snapshot.fixed_valid;
1219 else
1221 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1222 valid = pgStatLocal.snapshot.custom_valid;
1225 Assert(kind_info->fixed_amount);
1226 Assert(kind_info->snapshot_cb != NULL);
1228 if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
1230 /* rebuild every time */
1231 valid[idx] = false;
1233 else if (valid[idx])
1235 /* in snapshot mode we shouldn't get called again */
1236 Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
1237 return;
1240 Assert(!valid[idx]);
1242 kind_info->snapshot_cb();
1244 Assert(!valid[idx]);
1245 valid[idx] = true;
1249 /* ------------------------------------------------------------
1250 * Backend-local pending stats infrastructure
1251 * ------------------------------------------------------------
1255 * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1256 * stats if not already done.
1258 * If created_entry is non-NULL, it'll be set to true if the entry is newly
1259 * created, false otherwise.
1261 PgStat_EntryRef *
1262 pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid, bool *created_entry)
1264 PgStat_EntryRef *entry_ref;
1266 /* need to be able to flush out */
1267 Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
1269 if (unlikely(!pgStatPendingContext))
1271 pgStatPendingContext =
1272 AllocSetContextCreate(TopMemoryContext,
1273 "PgStat Pending",
1274 ALLOCSET_SMALL_SIZES);
1277 entry_ref = pgstat_get_entry_ref(kind, dboid, objid,
1278 true, created_entry);
1280 if (entry_ref->pending == NULL)
1282 size_t entrysize = pgstat_get_kind_info(kind)->pending_size;
1284 Assert(entrysize != (size_t) -1);
1286 entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
1287 dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
1290 return entry_ref;
1294 * Return an existing stats entry, or NULL.
1296 * This should only be used for helper function for pgstatfuncs.c - outside of
1297 * that it shouldn't be needed.
1299 PgStat_EntryRef *
1300 pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
1302 PgStat_EntryRef *entry_ref;
1304 entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL);
1306 if (entry_ref == NULL || entry_ref->pending == NULL)
1307 return NULL;
1309 return entry_ref;
1312 void
1313 pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
1315 PgStat_Kind kind = entry_ref->shared_entry->key.kind;
1316 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1317 void *pending_data = entry_ref->pending;
1319 Assert(pending_data != NULL);
1320 /* !fixed_amount stats should be handled explicitly */
1321 Assert(!pgstat_get_kind_info(kind)->fixed_amount);
1323 if (kind_info->delete_pending_cb)
1324 kind_info->delete_pending_cb(entry_ref);
1326 pfree(pending_data);
1327 entry_ref->pending = NULL;
1329 dlist_delete(&entry_ref->pending_node);
1333 * Flush out pending stats for database objects (databases, relations,
1334 * functions).
1336 static bool
1337 pgstat_flush_pending_entries(bool nowait)
1339 bool have_pending = false;
1340 dlist_node *cur = NULL;
1343 * Need to be a bit careful iterating over the list of pending entries.
1344 * Processing a pending entry may queue further pending entries to the end
1345 * of the list that we want to process, so a simple iteration won't do.
1346 * Further complicating matters is that we want to delete the current
1347 * entry in each iteration from the list if we flushed successfully.
1349 * So we just keep track of the next pointer in each loop iteration.
1351 if (!dlist_is_empty(&pgStatPending))
1352 cur = dlist_head_node(&pgStatPending);
1354 while (cur)
1356 PgStat_EntryRef *entry_ref =
1357 dlist_container(PgStat_EntryRef, pending_node, cur);
1358 PgStat_HashKey key = entry_ref->shared_entry->key;
1359 PgStat_Kind kind = key.kind;
1360 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
1361 bool did_flush;
1362 dlist_node *next;
1364 Assert(!kind_info->fixed_amount);
1365 Assert(kind_info->flush_pending_cb != NULL);
1367 /* flush the stats, if possible */
1368 did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
1370 Assert(did_flush || nowait);
1372 /* determine next entry, before deleting the pending entry */
1373 if (dlist_has_next(&pgStatPending, cur))
1374 next = dlist_next_node(&pgStatPending, cur);
1375 else
1376 next = NULL;
1378 /* if successfully flushed, remove entry */
1379 if (did_flush)
1380 pgstat_delete_pending_entry(entry_ref);
1381 else
1382 have_pending = true;
1384 cur = next;
1387 Assert(dlist_is_empty(&pgStatPending) == !have_pending);
1389 return have_pending;
1393 /* ------------------------------------------------------------
1394 * Helper / infrastructure functions
1395 * ------------------------------------------------------------
1398 PgStat_Kind
1399 pgstat_get_kind_from_str(char *kind_str)
1401 for (PgStat_Kind kind = PGSTAT_KIND_BUILTIN_MIN; kind <= PGSTAT_KIND_BUILTIN_MAX; kind++)
1403 if (pg_strcasecmp(kind_str, pgstat_kind_builtin_infos[kind].name) == 0)
1404 return kind;
1407 /* Check the custom set of cumulative stats */
1408 if (pgstat_kind_custom_infos)
1410 for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
1412 uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1414 if (pgstat_kind_custom_infos[idx] &&
1415 pg_strcasecmp(kind_str, pgstat_kind_custom_infos[idx]->name) == 0)
1416 return kind;
1420 ereport(ERROR,
1421 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1422 errmsg("invalid statistics kind: \"%s\"", kind_str)));
1423 return PGSTAT_KIND_INVALID; /* avoid compiler warnings */
1426 static inline bool
1427 pgstat_is_kind_valid(PgStat_Kind kind)
1429 return pgstat_is_kind_builtin(kind) || pgstat_is_kind_custom(kind);
1432 const PgStat_KindInfo *
1433 pgstat_get_kind_info(PgStat_Kind kind)
1435 if (pgstat_is_kind_builtin(kind))
1436 return &pgstat_kind_builtin_infos[kind];
1438 if (pgstat_is_kind_custom(kind))
1440 uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1442 if (pgstat_kind_custom_infos == NULL ||
1443 pgstat_kind_custom_infos[idx] == NULL)
1444 return NULL;
1445 return pgstat_kind_custom_infos[idx];
1448 return NULL;
1452 * Register a new stats kind.
1454 * PgStat_Kinds must be globally unique across all extensions. Refer
1455 * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
1456 * unique ID for your extension, to avoid conflicts with other extension
1457 * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
1458 * needlessly reserving a new ID.
1460 void
1461 pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
1463 uint32 idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1465 if (kind_info->name == NULL || strlen(kind_info->name) == 0)
1466 ereport(ERROR,
1467 (errmsg("custom cumulative statistics name is invalid"),
1468 errhint("Provide a non-empty name for the custom cumulative statistics.")));
1470 if (!pgstat_is_kind_custom(kind))
1471 ereport(ERROR, (errmsg("custom cumulative statistics ID %u is out of range", kind),
1472 errhint("Provide a custom cumulative statistics ID between %u and %u.",
1473 PGSTAT_KIND_CUSTOM_MIN, PGSTAT_KIND_CUSTOM_MAX)));
1475 if (!process_shared_preload_libraries_in_progress)
1476 ereport(ERROR,
1477 (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1478 errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
1481 * Check some data for fixed-numbered stats.
1483 if (kind_info->fixed_amount)
1485 if (kind_info->shared_size == 0)
1486 ereport(ERROR,
1487 (errmsg("custom cumulative statistics property is invalid"),
1488 errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
1492 * If pgstat_kind_custom_infos is not available yet, allocate it.
1494 if (pgstat_kind_custom_infos == NULL)
1496 pgstat_kind_custom_infos = (const PgStat_KindInfo **)
1497 MemoryContextAllocZero(TopMemoryContext,
1498 sizeof(PgStat_KindInfo *) * PGSTAT_KIND_CUSTOM_SIZE);
1501 if (pgstat_kind_custom_infos[idx] != NULL &&
1502 pgstat_kind_custom_infos[idx]->name != NULL)
1503 ereport(ERROR,
1504 (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1505 errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
1506 pgstat_kind_custom_infos[idx]->name)));
1508 /* check for existing custom stats with the same name */
1509 for (PgStat_Kind existing_kind = PGSTAT_KIND_CUSTOM_MIN; existing_kind <= PGSTAT_KIND_CUSTOM_MAX; existing_kind++)
1511 uint32 existing_idx = existing_kind - PGSTAT_KIND_CUSTOM_MIN;
1513 if (pgstat_kind_custom_infos[existing_idx] == NULL)
1514 continue;
1515 if (!pg_strcasecmp(pgstat_kind_custom_infos[existing_idx]->name, kind_info->name))
1516 ereport(ERROR,
1517 (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info->name, kind),
1518 errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
1521 /* Register it */
1522 pgstat_kind_custom_infos[idx] = kind_info;
1523 ereport(LOG,
1524 (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
1525 kind_info->name, kind)));
1529 * Stats should only be reported after pgstat_initialize() and before
1530 * pgstat_shutdown(). This check is put in a few central places to catch
1531 * violations of this rule more easily.
1533 #ifdef USE_ASSERT_CHECKING
1534 void
1535 pgstat_assert_is_up(void)
1537 Assert(pgstat_is_initialized && !pgstat_is_shutdown);
1539 #endif
1542 /* ------------------------------------------------------------
1543 * reading and writing of on-disk stats file
1544 * ------------------------------------------------------------
1547 /* helpers for pgstat_write_statsfile() */
1548 static void
1549 write_chunk(FILE *fpout, void *ptr, size_t len)
1551 int rc;
1553 rc = fwrite(ptr, len, 1, fpout);
1555 /* we'll check for errors with ferror once at the end */
1556 (void) rc;
1559 #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
1562 * This function is called in the last process that is accessing the shared
1563 * stats so locking is not required.
1565 static void
1566 pgstat_write_statsfile(XLogRecPtr redo)
1568 FILE *fpout;
1569 int32 format_id;
1570 const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
1571 const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1572 dshash_seq_status hstat;
1573 PgStatShared_HashEntry *ps;
1575 pgstat_assert_is_up();
1577 /* should be called only by the checkpointer or single user mode */
1578 Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
1580 /* we're shutting down, so it's ok to just override this */
1581 pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
1583 elog(DEBUG2, "writing stats file \"%s\" with redo %X/%X", statfile,
1584 LSN_FORMAT_ARGS(redo));
1587 * Open the statistics temp file to write out the current values.
1589 fpout = AllocateFile(tmpfile, PG_BINARY_W);
1590 if (fpout == NULL)
1592 ereport(LOG,
1593 (errcode_for_file_access(),
1594 errmsg("could not open temporary statistics file \"%s\": %m",
1595 tmpfile)));
1596 return;
1600 * Write the file header --- currently just a format ID.
1602 format_id = PGSTAT_FILE_FORMAT_ID;
1603 write_chunk_s(fpout, &format_id);
1605 /* Write the redo LSN, used to cross check the file read */
1606 write_chunk_s(fpout, &redo);
1608 /* Write various stats structs for fixed number of objects */
1609 for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
1611 char *ptr;
1612 const PgStat_KindInfo *info = pgstat_get_kind_info(kind);
1614 if (!info || !info->fixed_amount)
1615 continue;
1617 if (pgstat_is_kind_builtin(kind))
1618 Assert(info->snapshot_ctl_off != 0);
1620 pgstat_build_snapshot_fixed(kind);
1621 if (pgstat_is_kind_builtin(kind))
1622 ptr = ((char *) &pgStatLocal.snapshot) + info->snapshot_ctl_off;
1623 else
1624 ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN];
1626 fputc(PGSTAT_FILE_ENTRY_FIXED, fpout);
1627 write_chunk_s(fpout, &kind);
1628 write_chunk(fpout, ptr, info->shared_data_len);
1632 * Walk through the stats entries
1634 dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
1635 while ((ps = dshash_seq_next(&hstat)) != NULL)
1637 PgStatShared_Common *shstats;
1638 const PgStat_KindInfo *kind_info = NULL;
1640 CHECK_FOR_INTERRUPTS();
1642 /* we may have some "dropped" entries not yet removed, skip them */
1643 Assert(!ps->dropped);
1644 if (ps->dropped)
1645 continue;
1648 * This discards data related to custom stats kinds that are unknown
1649 * to this process.
1651 if (!pgstat_is_kind_valid(ps->key.kind))
1653 elog(WARNING, "found unknown stats entry %u/%u/%llu",
1654 ps->key.kind, ps->key.dboid,
1655 (unsigned long long) ps->key.objid);
1656 continue;
1659 shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
1661 kind_info = pgstat_get_kind_info(ps->key.kind);
1663 /* if not dropped the valid-entry refcount should exist */
1664 Assert(pg_atomic_read_u32(&ps->refcount) > 0);
1666 if (!kind_info->to_serialized_name)
1668 /* normal stats entry, identified by PgStat_HashKey */
1669 fputc(PGSTAT_FILE_ENTRY_HASH, fpout);
1670 write_chunk_s(fpout, &ps->key);
1672 else
1674 /* stats entry identified by name on disk (e.g. slots) */
1675 NameData name;
1677 kind_info->to_serialized_name(&ps->key, shstats, &name);
1679 fputc(PGSTAT_FILE_ENTRY_NAME, fpout);
1680 write_chunk_s(fpout, &ps->key.kind);
1681 write_chunk_s(fpout, &name);
1684 /* Write except the header part of the entry */
1685 write_chunk(fpout,
1686 pgstat_get_entry_data(ps->key.kind, shstats),
1687 pgstat_get_entry_len(ps->key.kind));
1689 dshash_seq_term(&hstat);
1692 * No more output to be done. Close the temp file and replace the old
1693 * pgstat.stat with it. The ferror() check replaces testing for error
1694 * after each individual fputc or fwrite (in write_chunk()) above.
1696 fputc(PGSTAT_FILE_ENTRY_END, fpout);
1698 if (ferror(fpout))
1700 ereport(LOG,
1701 (errcode_for_file_access(),
1702 errmsg("could not write temporary statistics file \"%s\": %m",
1703 tmpfile)));
1704 FreeFile(fpout);
1705 unlink(tmpfile);
1707 else if (FreeFile(fpout) < 0)
1709 ereport(LOG,
1710 (errcode_for_file_access(),
1711 errmsg("could not close temporary statistics file \"%s\": %m",
1712 tmpfile)));
1713 unlink(tmpfile);
1715 else if (durable_rename(tmpfile, statfile, LOG) < 0)
1717 /* durable_rename already emitted log message */
1718 unlink(tmpfile);
1722 /* helpers for pgstat_read_statsfile() */
1723 static bool
1724 read_chunk(FILE *fpin, void *ptr, size_t len)
1726 return fread(ptr, 1, len, fpin) == len;
1729 #define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
1732 * Reads in existing statistics file into memory.
1734 * This function is called in the only process that is accessing the shared
1735 * stats so locking is not required.
1737 static void
1738 pgstat_read_statsfile(XLogRecPtr redo)
1740 FILE *fpin;
1741 int32 format_id;
1742 bool found;
1743 const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
1744 PgStat_ShmemControl *shmem = pgStatLocal.shmem;
1745 XLogRecPtr file_redo;
1747 /* shouldn't be called from postmaster */
1748 Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
1750 elog(DEBUG2, "reading stats file \"%s\" with redo %X/%X", statfile,
1751 LSN_FORMAT_ARGS(redo));
1754 * Try to open the stats file. If it doesn't exist, the backends simply
1755 * returns zero for anything and statistics simply starts from scratch
1756 * with empty counters.
1758 * ENOENT is a possibility if stats collection was previously disabled or
1759 * has not yet written the stats file for the first time. Any other
1760 * failure condition is suspicious.
1762 if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
1764 if (errno != ENOENT)
1765 ereport(LOG,
1766 (errcode_for_file_access(),
1767 errmsg("could not open statistics file \"%s\": %m",
1768 statfile)));
1769 pgstat_reset_after_failure();
1770 return;
1774 * Verify it's of the expected format.
1776 if (!read_chunk_s(fpin, &format_id))
1778 elog(WARNING, "could not read format ID");
1779 goto error;
1782 if (format_id != PGSTAT_FILE_FORMAT_ID)
1784 elog(WARNING, "found incorrect format ID %d (expected %d)",
1785 format_id, PGSTAT_FILE_FORMAT_ID);
1786 goto error;
1790 * Read the redo LSN stored in the file.
1792 if (!read_chunk_s(fpin, &file_redo))
1794 elog(WARNING, "could not read redo LSN");
1795 goto error;
1798 if (file_redo != redo)
1800 elog(WARNING, "found incorrect redo LSN %X/%X (expected %X/%X)",
1801 LSN_FORMAT_ARGS(file_redo), LSN_FORMAT_ARGS(redo));
1802 goto error;
1806 * We found an existing statistics file. Read it and put all the stats
1807 * data into place.
1809 for (;;)
1811 int t = fgetc(fpin);
1813 switch (t)
1815 case PGSTAT_FILE_ENTRY_FIXED:
1817 PgStat_Kind kind;
1818 const PgStat_KindInfo *info;
1819 char *ptr;
1821 /* entry for fixed-numbered stats */
1822 if (!read_chunk_s(fpin, &kind))
1824 elog(WARNING, "could not read stats kind for entry of type %c", t);
1825 goto error;
1828 if (!pgstat_is_kind_valid(kind))
1830 elog(WARNING, "invalid stats kind %u for entry of type %c",
1831 kind, t);
1832 goto error;
1835 info = pgstat_get_kind_info(kind);
1836 if (!info)
1838 elog(WARNING, "could not find information of kind %u for entry of type %c",
1839 kind, t);
1840 goto error;
1843 if (!info->fixed_amount)
1845 elog(WARNING, "invalid fixed_amount in stats kind %u for entry of type %c",
1846 kind, t);
1847 goto error;
1850 /* Load back stats into shared memory */
1851 if (pgstat_is_kind_builtin(kind))
1852 ptr = ((char *) shmem) + info->shared_ctl_off +
1853 info->shared_data_off;
1854 else
1856 int idx = kind - PGSTAT_KIND_CUSTOM_MIN;
1858 ptr = ((char *) shmem->custom_data[idx]) +
1859 info->shared_data_off;
1862 if (!read_chunk(fpin, ptr, info->shared_data_len))
1864 elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u",
1865 kind, t, info->shared_data_len);
1866 goto error;
1869 break;
1871 case PGSTAT_FILE_ENTRY_HASH:
1872 case PGSTAT_FILE_ENTRY_NAME:
1874 PgStat_HashKey key;
1875 PgStatShared_HashEntry *p;
1876 PgStatShared_Common *header;
1878 CHECK_FOR_INTERRUPTS();
1880 if (t == PGSTAT_FILE_ENTRY_HASH)
1882 /* normal stats entry, identified by PgStat_HashKey */
1883 if (!read_chunk_s(fpin, &key))
1885 elog(WARNING, "could not read key for entry of type %c", t);
1886 goto error;
1889 if (!pgstat_is_kind_valid(key.kind))
1891 elog(WARNING, "invalid stats kind for entry %u/%u/%llu of type %c",
1892 key.kind, key.dboid,
1893 (unsigned long long) key.objid, t);
1894 goto error;
1897 else
1899 /* stats entry identified by name on disk (e.g. slots) */
1900 const PgStat_KindInfo *kind_info = NULL;
1901 PgStat_Kind kind;
1902 NameData name;
1904 if (!read_chunk_s(fpin, &kind))
1906 elog(WARNING, "could not read stats kind for entry of type %c", t);
1907 goto error;
1909 if (!read_chunk_s(fpin, &name))
1911 elog(WARNING, "could not read name of stats kind %u for entry of type %c",
1912 kind, t);
1913 goto error;
1915 if (!pgstat_is_kind_valid(kind))
1917 elog(WARNING, "invalid stats kind %u for entry of type %c",
1918 kind, t);
1919 goto error;
1922 kind_info = pgstat_get_kind_info(kind);
1923 if (!kind_info)
1925 elog(WARNING, "could not find information of kind %u for entry of type %c",
1926 kind, t);
1927 goto error;
1930 if (!kind_info->from_serialized_name)
1932 elog(WARNING, "invalid from_serialized_name in stats kind %u for entry of type %c",
1933 kind, t);
1934 goto error;
1937 if (!kind_info->from_serialized_name(&name, &key))
1939 /* skip over data for entry we don't care about */
1940 if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
1942 elog(WARNING, "could not seek \"%s\" of stats kind %u for entry of type %c",
1943 NameStr(name), kind, t);
1944 goto error;
1947 continue;
1950 Assert(key.kind == kind);
1954 * This intentionally doesn't use pgstat_get_entry_ref() -
1955 * putting all stats into checkpointer's
1956 * pgStatEntryRefHash would be wasted effort and memory.
1958 p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
1960 /* don't allow duplicate entries */
1961 if (found)
1963 dshash_release_lock(pgStatLocal.shared_hash, p);
1964 elog(WARNING, "found duplicate stats entry %u/%u/%llu of type %c",
1965 key.kind, key.dboid,
1966 (unsigned long long) key.objid, t);
1967 goto error;
1970 header = pgstat_init_entry(key.kind, p);
1971 dshash_release_lock(pgStatLocal.shared_hash, p);
1973 if (!read_chunk(fpin,
1974 pgstat_get_entry_data(key.kind, header),
1975 pgstat_get_entry_len(key.kind)))
1977 elog(WARNING, "could not read data for entry %u/%u/%llu of type %c",
1978 key.kind, key.dboid,
1979 (unsigned long long) key.objid, t);
1980 goto error;
1983 break;
1985 case PGSTAT_FILE_ENTRY_END:
1988 * check that PGSTAT_FILE_ENTRY_END actually signals end of
1989 * file
1991 if (fgetc(fpin) != EOF)
1993 elog(WARNING, "could not read end-of-file");
1994 goto error;
1997 goto done;
1999 default:
2000 elog(WARNING, "could not read entry of type %c", t);
2001 goto error;
2005 done:
2006 FreeFile(fpin);
2008 elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
2009 unlink(statfile);
2011 return;
2013 error:
2014 ereport(LOG,
2015 (errmsg("corrupted statistics file \"%s\"", statfile)));
2017 pgstat_reset_after_failure();
2019 goto done;
2023 * Helper to reset / drop stats after a crash or after restoring stats from
2024 * disk failed, potentially after already loading parts.
2026 static void
2027 pgstat_reset_after_failure(void)
2029 TimestampTz ts = GetCurrentTimestamp();
2031 /* reset fixed-numbered stats */
2032 for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
2034 const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
2036 if (!kind_info || !kind_info->fixed_amount)
2037 continue;
2039 kind_info->reset_all_cb(ts);
2042 /* and drop variable-numbered ones */
2043 pgstat_drop_all_entries();
2047 * GUC assign_hook for stats_fetch_consistency.
2049 void
2050 assign_stats_fetch_consistency(int newval, void *extra)
2053 * Changing this value in a transaction may cause snapshot state
2054 * inconsistencies, so force a clear of the current snapshot on the next
2055 * snapshot build attempt.
2057 if (pgstat_fetch_consistency != newval)
2058 force_stats_snapshot_clear = true;