3 * Infrastructure for the cumulative statistics system.
5 * The cumulative statistics system accumulates statistics for different kinds
6 * of objects. Some kinds of statistics are collected for a fixed number of
7 * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
8 * statistics are collected for a varying number of objects
9 * (e.g. relations). See PgStat_KindInfo for a list of currently handled
12 * Statistics are loaded from the filesystem during startup (by the startup
13 * process), unless preceded by a crash, in which case all stats are
14 * discarded. They are written out by the checkpointer process just before
15 * shutting down, except when shutting down in immediate mode.
17 * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
19 * Statistics for variable-numbered objects are stored in dynamic shared
20 * memory and can be found via a dshash hashtable. The statistics counters are
21 * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
22 * separately allocated (PgStatShared_HashEntry->body). The separate
23 * allocation allows different kinds of statistics to be stored in the same
24 * hashtable without wasting space in PgStatShared_HashEntry.
26 * Variable-numbered stats are addressed by PgStat_HashKey while running. It
27 * is not possible to have statistics for an object that cannot be addressed
28 * that way at runtime. A wider identifier can be used when serializing to
29 * disk (used for replication slot stats).
31 * To avoid contention on the shared hashtable, each backend has a
32 * backend-local hashtable (pgStatEntryRefHash) in front of the shared
33 * hashtable, containing references (PgStat_EntryRef) to shared hashtable
34 * entries. The shared hashtable only needs to be accessed when no prior
35 * reference is found in the local hashtable. Besides pointing to the
36 * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
37 * contains a pointer to the shared statistics data, as a process-local
38 * address, to reduce access costs.
40 * The names for structs stored in shared memory are prefixed with
41 * PgStatShared instead of PgStat. Each stats entry in shared memory is
42 * protected by a dedicated lwlock.
44 * Most stats updates are first accumulated locally in each process as pending
45 * entries, then later flushed to shared memory (just after commit, or by
46 * idle-timeout). This practically eliminates contention on individual stats
47 * entries. For most kinds of variable-numbered pending stats data is stored
48 * in PgStat_EntryRef->pending. All entries with pending data are in the
49 * pgStatPending list. Pending statistics updates are flushed out by
50 * pgstat_report_stat().
52 * It is possible for external modules to define custom statistics kinds,
53 * that can use the same properties as any built-in stats kinds. Each custom
54 * stats kind needs to assign a unique ID to ensure that it does not overlap
55 * with other extensions. In order to reserve a unique stats kind ID, refer
56 * to https://wiki.postgresql.org/wiki/CustomCumulativeStats.
58 * The behavior of different kinds of statistics is determined by the kind's
59 * entry in pgstat_kind_builtin_infos for all the built-in statistics kinds
60 * defined, and pgstat_kind_custom_infos for custom kinds registered at
61 * startup by pgstat_register_kind(). See PgStat_KindInfo for details.
63 * The consistency of read accesses to statistics can be configured using the
64 * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
65 * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
66 * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
67 * pgStatLocal.snapshot.
69 * To keep things manageable, stats handling is split across several
70 * files. Infrastructure pieces are in:
71 * - pgstat.c - this file, to tie it all together
72 * - pgstat_shmem.c - nearly everything dealing with shared memory, including
73 * the maintenance of hashtable entries
74 * - pgstat_xact.c - transactional integration, including the transactional
75 * creation and dropping of stats entries
77 * Each statistics kind is handled in a dedicated file:
80 * - pgstat_checkpointer.c
87 * - pgstat_subscription.c
90 * Whenever possible infrastructure files should not contain code related to
91 * specific kinds of stats.
94 * Copyright (c) 2001-2024, PostgreSQL Global Development Group
97 * src/backend/utils/activity/pgstat.c
100 #include "postgres.h"
104 #include "access/xact.h"
105 #include "access/xlog.h"
106 #include "lib/dshash.h"
108 #include "storage/fd.h"
109 #include "storage/ipc.h"
110 #include "storage/lwlock.h"
111 #include "utils/guc_hooks.h"
112 #include "utils/memutils.h"
113 #include "utils/pgstat_internal.h"
114 #include "utils/timestamp.h"
124 /* minimum interval non-forced stats flushes.*/
125 #define PGSTAT_MIN_INTERVAL 1000
126 /* how long until to block flushing pending stats updates */
127 #define PGSTAT_MAX_INTERVAL 60000
128 /* when to call pgstat_report_stat() again, even when idle */
129 #define PGSTAT_IDLE_INTERVAL 10000
132 * Initial size hints for the hash tables used in statistics.
136 #define PGSTAT_SNAPSHOT_HASH_SIZE 512
139 * Identifiers in stats file.
142 #define PGSTAT_FILE_ENTRY_END 'E' /* end of file */
143 #define PGSTAT_FILE_ENTRY_FIXED 'F' /* fixed-numbered stats entry */
144 #define PGSTAT_FILE_ENTRY_NAME 'N' /* stats entry identified by name */
145 #define PGSTAT_FILE_ENTRY_HASH 'S' /* stats entry identified by
148 /* hash table for statistics snapshots entry */
149 typedef struct PgStat_SnapshotEntry
152 char status
; /* for simplehash use */
153 void *data
; /* the stats data itself */
154 } PgStat_SnapshotEntry
;
158 * Backend-local Hash Table Definitions
162 /* for stats snapshot entries */
163 #define SH_PREFIX pgstat_snapshot
164 #define SH_ELEMENT_TYPE PgStat_SnapshotEntry
165 #define SH_KEY_TYPE PgStat_HashKey
167 #define SH_HASH_KEY(tb, key) \
168 pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
169 #define SH_EQUAL(tb, a, b) \
170 pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
171 #define SH_SCOPE static inline
174 #include "lib/simplehash.h"
178 * Local function forward declarations
182 static void pgstat_write_statsfile(XLogRecPtr redo
);
183 static void pgstat_read_statsfile(XLogRecPtr redo
);
185 static void pgstat_init_snapshot_fixed(void);
187 static void pgstat_reset_after_failure(void);
189 static bool pgstat_flush_pending_entries(bool nowait
);
191 static void pgstat_prep_snapshot(void);
192 static void pgstat_build_snapshot(void);
193 static void pgstat_build_snapshot_fixed(PgStat_Kind kind
);
195 static inline bool pgstat_is_kind_valid(PgStat_Kind kind
);
203 bool pgstat_track_counts
= false;
204 int pgstat_fetch_consistency
= PGSTAT_FETCH_CONSISTENCY_CACHE
;
208 * state shared with pgstat_*.c
212 PgStat_LocalState pgStatLocal
;
218 * NB: There should be only variables related to stats infrastructure here,
219 * not for specific kinds of stats.
224 * Memory contexts containing the pgStatEntryRefHash table, the
225 * pgStatSharedRef entries, and pending data respectively. Mostly to make it
226 * easier to track / attribute memory usage.
229 static MemoryContext pgStatPendingContext
= NULL
;
232 * Backend local list of PgStat_EntryRef with unflushed pending stats.
234 * Newly pending entries should only ever be added to the end of the list,
235 * otherwise pgstat_flush_pending_entries() might not see them immediately.
237 static dlist_head pgStatPending
= DLIST_STATIC_INIT(pgStatPending
);
241 * Force the next stats flush to happen regardless of
242 * PGSTAT_MIN_INTERVAL. Useful in test scripts.
244 static bool pgStatForceNextFlush
= false;
247 * Force-clear existing snapshot before next use when stats_fetch_consistency
250 static bool force_stats_snapshot_clear
= false;
254 * For assertions that check pgstat is not used before initialization / after
257 #ifdef USE_ASSERT_CHECKING
258 static bool pgstat_is_initialized
= false;
259 static bool pgstat_is_shutdown
= false;
264 * The different kinds of built-in statistics.
266 * If reasonably possible, handling specific to one kind of stats should go
267 * through this abstraction, rather than making more of pgstat.c aware.
269 * See comments for struct PgStat_KindInfo for details about the individual
272 * XXX: It'd be nicer to define this outside of this file. But there doesn't
273 * seem to be a great way of doing that, given the split across multiple
276 static const PgStat_KindInfo pgstat_kind_builtin_infos
[PGSTAT_KIND_BUILTIN_SIZE
] = {
278 /* stats kinds for variable-numbered objects */
280 [PGSTAT_KIND_DATABASE
] = {
283 .fixed_amount
= false,
284 /* so pg_stat_database entries can be seen in all databases */
285 .accessed_across_databases
= true,
287 .shared_size
= sizeof(PgStatShared_Database
),
288 .shared_data_off
= offsetof(PgStatShared_Database
, stats
),
289 .shared_data_len
= sizeof(((PgStatShared_Database
*) 0)->stats
),
290 .pending_size
= sizeof(PgStat_StatDBEntry
),
292 .flush_pending_cb
= pgstat_database_flush_cb
,
293 .reset_timestamp_cb
= pgstat_database_reset_timestamp_cb
,
296 [PGSTAT_KIND_RELATION
] = {
299 .fixed_amount
= false,
301 .shared_size
= sizeof(PgStatShared_Relation
),
302 .shared_data_off
= offsetof(PgStatShared_Relation
, stats
),
303 .shared_data_len
= sizeof(((PgStatShared_Relation
*) 0)->stats
),
304 .pending_size
= sizeof(PgStat_TableStatus
),
306 .flush_pending_cb
= pgstat_relation_flush_cb
,
307 .delete_pending_cb
= pgstat_relation_delete_pending_cb
,
310 [PGSTAT_KIND_FUNCTION
] = {
313 .fixed_amount
= false,
315 .shared_size
= sizeof(PgStatShared_Function
),
316 .shared_data_off
= offsetof(PgStatShared_Function
, stats
),
317 .shared_data_len
= sizeof(((PgStatShared_Function
*) 0)->stats
),
318 .pending_size
= sizeof(PgStat_FunctionCounts
),
320 .flush_pending_cb
= pgstat_function_flush_cb
,
323 [PGSTAT_KIND_REPLSLOT
] = {
326 .fixed_amount
= false,
328 .accessed_across_databases
= true,
330 .shared_size
= sizeof(PgStatShared_ReplSlot
),
331 .shared_data_off
= offsetof(PgStatShared_ReplSlot
, stats
),
332 .shared_data_len
= sizeof(((PgStatShared_ReplSlot
*) 0)->stats
),
334 .reset_timestamp_cb
= pgstat_replslot_reset_timestamp_cb
,
335 .to_serialized_name
= pgstat_replslot_to_serialized_name_cb
,
336 .from_serialized_name
= pgstat_replslot_from_serialized_name_cb
,
339 [PGSTAT_KIND_SUBSCRIPTION
] = {
340 .name
= "subscription",
342 .fixed_amount
= false,
343 /* so pg_stat_subscription_stats entries can be seen in all databases */
344 .accessed_across_databases
= true,
346 .shared_size
= sizeof(PgStatShared_Subscription
),
347 .shared_data_off
= offsetof(PgStatShared_Subscription
, stats
),
348 .shared_data_len
= sizeof(((PgStatShared_Subscription
*) 0)->stats
),
349 .pending_size
= sizeof(PgStat_BackendSubEntry
),
351 .flush_pending_cb
= pgstat_subscription_flush_cb
,
352 .reset_timestamp_cb
= pgstat_subscription_reset_timestamp_cb
,
356 /* stats for fixed-numbered (mostly 1) objects */
358 [PGSTAT_KIND_ARCHIVER
] = {
361 .fixed_amount
= true,
363 .snapshot_ctl_off
= offsetof(PgStat_Snapshot
, archiver
),
364 .shared_ctl_off
= offsetof(PgStat_ShmemControl
, archiver
),
365 .shared_data_off
= offsetof(PgStatShared_Archiver
, stats
),
366 .shared_data_len
= sizeof(((PgStatShared_Archiver
*) 0)->stats
),
368 .init_shmem_cb
= pgstat_archiver_init_shmem_cb
,
369 .reset_all_cb
= pgstat_archiver_reset_all_cb
,
370 .snapshot_cb
= pgstat_archiver_snapshot_cb
,
373 [PGSTAT_KIND_BGWRITER
] = {
376 .fixed_amount
= true,
378 .snapshot_ctl_off
= offsetof(PgStat_Snapshot
, bgwriter
),
379 .shared_ctl_off
= offsetof(PgStat_ShmemControl
, bgwriter
),
380 .shared_data_off
= offsetof(PgStatShared_BgWriter
, stats
),
381 .shared_data_len
= sizeof(((PgStatShared_BgWriter
*) 0)->stats
),
383 .init_shmem_cb
= pgstat_bgwriter_init_shmem_cb
,
384 .reset_all_cb
= pgstat_bgwriter_reset_all_cb
,
385 .snapshot_cb
= pgstat_bgwriter_snapshot_cb
,
388 [PGSTAT_KIND_CHECKPOINTER
] = {
389 .name
= "checkpointer",
391 .fixed_amount
= true,
393 .snapshot_ctl_off
= offsetof(PgStat_Snapshot
, checkpointer
),
394 .shared_ctl_off
= offsetof(PgStat_ShmemControl
, checkpointer
),
395 .shared_data_off
= offsetof(PgStatShared_Checkpointer
, stats
),
396 .shared_data_len
= sizeof(((PgStatShared_Checkpointer
*) 0)->stats
),
398 .init_shmem_cb
= pgstat_checkpointer_init_shmem_cb
,
399 .reset_all_cb
= pgstat_checkpointer_reset_all_cb
,
400 .snapshot_cb
= pgstat_checkpointer_snapshot_cb
,
406 .fixed_amount
= true,
408 .snapshot_ctl_off
= offsetof(PgStat_Snapshot
, io
),
409 .shared_ctl_off
= offsetof(PgStat_ShmemControl
, io
),
410 .shared_data_off
= offsetof(PgStatShared_IO
, stats
),
411 .shared_data_len
= sizeof(((PgStatShared_IO
*) 0)->stats
),
413 .flush_fixed_cb
= pgstat_io_flush_cb
,
414 .have_fixed_pending_cb
= pgstat_io_have_pending_cb
,
415 .init_shmem_cb
= pgstat_io_init_shmem_cb
,
416 .reset_all_cb
= pgstat_io_reset_all_cb
,
417 .snapshot_cb
= pgstat_io_snapshot_cb
,
420 [PGSTAT_KIND_SLRU
] = {
423 .fixed_amount
= true,
425 .snapshot_ctl_off
= offsetof(PgStat_Snapshot
, slru
),
426 .shared_ctl_off
= offsetof(PgStat_ShmemControl
, slru
),
427 .shared_data_off
= offsetof(PgStatShared_SLRU
, stats
),
428 .shared_data_len
= sizeof(((PgStatShared_SLRU
*) 0)->stats
),
430 .flush_fixed_cb
= pgstat_slru_flush_cb
,
431 .have_fixed_pending_cb
= pgstat_slru_have_pending_cb
,
432 .init_shmem_cb
= pgstat_slru_init_shmem_cb
,
433 .reset_all_cb
= pgstat_slru_reset_all_cb
,
434 .snapshot_cb
= pgstat_slru_snapshot_cb
,
437 [PGSTAT_KIND_WAL
] = {
440 .fixed_amount
= true,
442 .snapshot_ctl_off
= offsetof(PgStat_Snapshot
, wal
),
443 .shared_ctl_off
= offsetof(PgStat_ShmemControl
, wal
),
444 .shared_data_off
= offsetof(PgStatShared_Wal
, stats
),
445 .shared_data_len
= sizeof(((PgStatShared_Wal
*) 0)->stats
),
447 .init_backend_cb
= pgstat_wal_init_backend_cb
,
448 .flush_fixed_cb
= pgstat_wal_flush_cb
,
449 .have_fixed_pending_cb
= pgstat_wal_have_pending_cb
,
450 .init_shmem_cb
= pgstat_wal_init_shmem_cb
,
451 .reset_all_cb
= pgstat_wal_reset_all_cb
,
452 .snapshot_cb
= pgstat_wal_snapshot_cb
,
457 * Information about custom statistics kinds.
459 * These are saved in a different array than the built-in kinds to save
460 * in clarity with the initializations.
462 * Indexed by PGSTAT_KIND_CUSTOM_MIN, of size PGSTAT_KIND_CUSTOM_SIZE.
464 static const PgStat_KindInfo
**pgstat_kind_custom_infos
= NULL
;
466 /* ------------------------------------------------------------
467 * Functions managing the state of the stats system for all backends.
468 * ------------------------------------------------------------
472 * Read on-disk stats into memory at server start.
474 * Should only be called by the startup process or in single user mode.
477 pgstat_restore_stats(XLogRecPtr redo
)
479 pgstat_read_statsfile(redo
);
483 * Remove the stats file. This is currently used only if WAL recovery is
484 * needed after a crash.
486 * Should only be called by the startup process or in single user mode.
489 pgstat_discard_stats(void)
493 /* NB: this needs to be done even in single user mode */
495 ret
= unlink(PGSTAT_STAT_PERMANENT_FILENAME
);
500 "didn't need to unlink permanent stats file \"%s\" - didn't exist",
501 PGSTAT_STAT_PERMANENT_FILENAME
);
504 (errcode_for_file_access(),
505 errmsg("could not unlink permanent statistics file \"%s\": %m",
506 PGSTAT_STAT_PERMANENT_FILENAME
)));
511 (errcode_for_file_access(),
512 errmsg_internal("unlinked permanent statistics file \"%s\"",
513 PGSTAT_STAT_PERMANENT_FILENAME
)));
517 * Reset stats contents. This will set reset timestamps of fixed-numbered
518 * stats to the current time (no variable stats exist).
520 pgstat_reset_after_failure();
524 * pgstat_before_server_shutdown() needs to be called by exactly one process
525 * during regular server shutdowns. Otherwise all stats will be lost.
527 * We currently only write out stats for proc_exit(0). We might want to change
528 * that at some point... But right now pgstat_discard_stats() would be called
529 * during the start after a disorderly shutdown, anyway.
532 pgstat_before_server_shutdown(int code
, Datum arg
)
534 Assert(pgStatLocal
.shmem
!= NULL
);
535 Assert(!pgStatLocal
.shmem
->is_shutdown
);
538 * Stats should only be reported after pgstat_initialize() and before
539 * pgstat_shutdown(). This is a convenient point to catch most violations
542 Assert(pgstat_is_initialized
&& !pgstat_is_shutdown
);
544 /* flush out our own pending changes before writing out */
545 pgstat_report_stat(true);
548 * Only write out file during normal shutdown. Don't even signal that
549 * we've shutdown during irregular shutdowns, because the shutdown
550 * sequence isn't coordinated to ensure this backend shuts down last.
554 pgStatLocal
.shmem
->is_shutdown
= true;
555 pgstat_write_statsfile(GetRedoRecPtr());
560 /* ------------------------------------------------------------
561 * Backend initialization / shutdown functions
562 * ------------------------------------------------------------
566 * Shut down a single backend's statistics reporting at process exit.
568 * Flush out any remaining statistics counts. Without this, operations
569 * triggered during backend exit (such as temp table deletions) won't be
573 pgstat_shutdown_hook(int code
, Datum arg
)
575 Assert(!pgstat_is_shutdown
);
576 Assert(IsUnderPostmaster
|| !IsPostmasterEnvironment
);
579 * If we got as far as discovering our own database ID, we can flush out
580 * what we did so far. Otherwise, we'd be reporting an invalid database
581 * ID, so forget it. (This means that accesses to pg_database during
582 * failed backend starts might never get counted.)
584 if (OidIsValid(MyDatabaseId
))
585 pgstat_report_disconnect(MyDatabaseId
);
587 pgstat_report_stat(true);
589 /* there shouldn't be any pending changes left */
590 Assert(dlist_is_empty(&pgStatPending
));
591 dlist_init(&pgStatPending
);
593 pgstat_detach_shmem();
595 #ifdef USE_ASSERT_CHECKING
596 pgstat_is_shutdown
= true;
601 * Initialize pgstats state, and set up our on-proc-exit hook. Called from
604 * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
607 pgstat_initialize(void)
609 Assert(!pgstat_is_initialized
);
611 pgstat_attach_shmem();
613 pgstat_init_snapshot_fixed();
615 /* Backend initialization callbacks */
616 for (PgStat_Kind kind
= PGSTAT_KIND_MIN
; kind
<= PGSTAT_KIND_MAX
; kind
++)
618 const PgStat_KindInfo
*kind_info
= pgstat_get_kind_info(kind
);
620 if (kind_info
== NULL
|| kind_info
->init_backend_cb
== NULL
)
623 kind_info
->init_backend_cb();
626 /* Set up a process-exit hook to clean up */
627 before_shmem_exit(pgstat_shutdown_hook
, 0);
629 #ifdef USE_ASSERT_CHECKING
630 pgstat_is_initialized
= true;
635 /* ------------------------------------------------------------
636 * Public functions used by backends follow
637 * ------------------------------------------------------------
641 * Must be called by processes that performs DML: tcop/postgres.c, logical
642 * receiver processes, SPI worker, etc. to flush pending statistics updates to
645 * Unless called with 'force', pending stats updates are flushed happen once
646 * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
647 * block on lock acquisition, except if stats updates have been pending for
648 * longer than PGSTAT_MAX_INTERVAL (60000ms).
650 * Whenever pending stats updates remain at the end of pgstat_report_stat() a
651 * suggested idle timeout is returned. Currently this is always
652 * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
653 * a timeout after which to call pgstat_report_stat(true), but are not
656 * Note that this is called only when not within a transaction, so it is fair
657 * to use transaction stop time as an approximation of current time.
660 pgstat_report_stat(bool force
)
662 static TimestampTz pending_since
= 0;
663 static TimestampTz last_flush
= 0;
668 pgstat_assert_is_up();
669 Assert(!IsTransactionOrTransactionBlock());
671 /* "absorb" the forced flush even if there's nothing to flush */
672 if (pgStatForceNextFlush
)
675 pgStatForceNextFlush
= false;
678 /* Don't expend a clock check if nothing to do */
679 if (dlist_is_empty(&pgStatPending
))
681 bool do_flush
= false;
683 /* Check for pending fixed-numbered stats */
684 for (PgStat_Kind kind
= PGSTAT_KIND_MIN
; kind
<= PGSTAT_KIND_MAX
; kind
++)
686 const PgStat_KindInfo
*kind_info
= pgstat_get_kind_info(kind
);
690 if (!kind_info
->fixed_amount
)
692 Assert(kind_info
->have_fixed_pending_cb
== NULL
);
695 if (!kind_info
->have_fixed_pending_cb
)
698 if (kind_info
->have_fixed_pending_cb())
707 Assert(pending_since
== 0);
713 * There should never be stats to report once stats are shut down. Can't
714 * assert that before the checks above, as there is an unconditional
715 * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
716 * the process that ran pgstat_before_server_shutdown() will still call.
718 Assert(!pgStatLocal
.shmem
->is_shutdown
);
723 * Stats reports are forced either when it's been too long since stats
724 * have been reported or in processes that force stats reporting to
725 * happen at specific points (including shutdown). In the former case
726 * the transaction stop time might be quite old, in the latter it
727 * would never get cleared.
729 now
= GetCurrentTimestamp();
733 now
= GetCurrentTransactionStopTimestamp();
735 if (pending_since
> 0 &&
736 TimestampDifferenceExceeds(pending_since
, now
, PGSTAT_MAX_INTERVAL
))
738 /* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
741 else if (last_flush
> 0 &&
742 !TimestampDifferenceExceeds(last_flush
, now
, PGSTAT_MIN_INTERVAL
))
744 /* don't flush too frequently */
745 if (pending_since
== 0)
748 return PGSTAT_IDLE_INTERVAL
;
752 pgstat_update_dbstats(now
);
754 /* don't wait for lock acquisition when !force */
757 partial_flush
= false;
759 /* flush database / relation / function / ... stats */
760 partial_flush
|= pgstat_flush_pending_entries(nowait
);
762 /* flush of fixed-numbered stats */
763 for (PgStat_Kind kind
= PGSTAT_KIND_MIN
; kind
<= PGSTAT_KIND_MAX
; kind
++)
765 const PgStat_KindInfo
*kind_info
= pgstat_get_kind_info(kind
);
769 if (!kind_info
->fixed_amount
)
771 Assert(kind_info
->flush_fixed_cb
== NULL
);
774 if (!kind_info
->flush_fixed_cb
)
777 partial_flush
|= kind_info
->flush_fixed_cb(nowait
);
783 * If some of the pending stats could not be flushed due to lock
784 * contention, let the caller know when to retry.
788 /* force should have prevented us from getting here */
791 /* remember since when stats have been pending */
792 if (pending_since
== 0)
795 return PGSTAT_IDLE_INTERVAL
;
804 * Force locally pending stats to be flushed during the next
805 * pgstat_report_stat() call. This is useful for writing tests.
808 pgstat_force_next_flush(void)
810 pgStatForceNextFlush
= true;
814 * Only for use by pgstat_reset_counters()
817 match_db_entries(PgStatShared_HashEntry
*entry
, Datum match_data
)
819 return entry
->key
.dboid
== DatumGetObjectId(MyDatabaseId
);
823 * Reset counters for our database.
825 * Permission checking for this function is managed through the normal
829 pgstat_reset_counters(void)
831 TimestampTz ts
= GetCurrentTimestamp();
833 pgstat_reset_matching_entries(match_db_entries
,
834 ObjectIdGetDatum(MyDatabaseId
),
839 * Reset a single variable-numbered entry.
841 * If the stats kind is within a database, also reset the database's
842 * stat_reset_timestamp.
844 * Permission checking for this function is managed through the normal
848 pgstat_reset(PgStat_Kind kind
, Oid dboid
, uint64 objid
)
850 const PgStat_KindInfo
*kind_info
= pgstat_get_kind_info(kind
);
851 TimestampTz ts
= GetCurrentTimestamp();
853 /* not needed atm, and doesn't make sense with the current signature */
854 Assert(!pgstat_get_kind_info(kind
)->fixed_amount
);
856 /* reset the "single counter" */
857 pgstat_reset_entry(kind
, dboid
, objid
, ts
);
859 if (!kind_info
->accessed_across_databases
)
860 pgstat_reset_database_timestamp(dboid
, ts
);
864 * Reset stats for all entries of a kind.
866 * Permission checking for this function is managed through the normal
870 pgstat_reset_of_kind(PgStat_Kind kind
)
872 const PgStat_KindInfo
*kind_info
= pgstat_get_kind_info(kind
);
873 TimestampTz ts
= GetCurrentTimestamp();
875 if (kind_info
->fixed_amount
)
876 kind_info
->reset_all_cb(ts
);
878 pgstat_reset_entries_of_kind(kind
, ts
);
882 /* ------------------------------------------------------------
884 * ------------------------------------------------------------
888 * Discard any data collected in the current transaction. Any subsequent
889 * request will cause new snapshots to be read.
891 * This is also invoked during transaction commit or abort to discard
892 * the no-longer-wanted snapshot. Updates of stats_fetch_consistency can
893 * cause this routine to be called.
896 pgstat_clear_snapshot(void)
898 pgstat_assert_is_up();
900 memset(&pgStatLocal
.snapshot
.fixed_valid
, 0,
901 sizeof(pgStatLocal
.snapshot
.fixed_valid
));
902 memset(&pgStatLocal
.snapshot
.custom_valid
, 0,
903 sizeof(pgStatLocal
.snapshot
.custom_valid
));
904 pgStatLocal
.snapshot
.stats
= NULL
;
905 pgStatLocal
.snapshot
.mode
= PGSTAT_FETCH_CONSISTENCY_NONE
;
907 /* Release memory, if any was allocated */
908 if (pgStatLocal
.snapshot
.context
)
910 MemoryContextDelete(pgStatLocal
.snapshot
.context
);
912 /* Reset variables */
913 pgStatLocal
.snapshot
.context
= NULL
;
917 * Historically the backend_status.c facilities lived in this file, and
918 * were reset with the same function. For now keep it that way, and
919 * forward the reset request.
921 pgstat_clear_backend_activity_snapshot();
923 /* Reset this flag, as it may be possible that a cleanup was forced. */
924 force_stats_snapshot_clear
= false;
928 pgstat_fetch_entry(PgStat_Kind kind
, Oid dboid
, uint64 objid
)
931 PgStat_EntryRef
*entry_ref
;
933 const PgStat_KindInfo
*kind_info
= pgstat_get_kind_info(kind
);
935 /* should be called from backends */
936 Assert(IsUnderPostmaster
|| !IsPostmasterEnvironment
);
937 Assert(!kind_info
->fixed_amount
);
939 pgstat_prep_snapshot();
942 memset(&key
, 0, sizeof(struct PgStat_HashKey
));
948 /* if we need to build a full snapshot, do so */
949 if (pgstat_fetch_consistency
== PGSTAT_FETCH_CONSISTENCY_SNAPSHOT
)
950 pgstat_build_snapshot();
952 /* if caching is desired, look up in cache */
953 if (pgstat_fetch_consistency
> PGSTAT_FETCH_CONSISTENCY_NONE
)
955 PgStat_SnapshotEntry
*entry
= NULL
;
957 entry
= pgstat_snapshot_lookup(pgStatLocal
.snapshot
.stats
, key
);
963 * If we built a full snapshot and the key is not in
964 * pgStatLocal.snapshot.stats, there are no matching stats.
966 if (pgstat_fetch_consistency
== PGSTAT_FETCH_CONSISTENCY_SNAPSHOT
)
970 pgStatLocal
.snapshot
.mode
= pgstat_fetch_consistency
;
972 entry_ref
= pgstat_get_entry_ref(kind
, dboid
, objid
, false, NULL
);
974 if (entry_ref
== NULL
|| entry_ref
->shared_entry
->dropped
)
976 /* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
977 if (pgstat_fetch_consistency
== PGSTAT_FETCH_CONSISTENCY_CACHE
)
979 PgStat_SnapshotEntry
*entry
= NULL
;
982 entry
= pgstat_snapshot_insert(pgStatLocal
.snapshot
.stats
, key
, &found
);
990 * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
991 * otherwise we could quickly end up with a fair bit of memory used due to
994 if (pgstat_fetch_consistency
== PGSTAT_FETCH_CONSISTENCY_NONE
)
995 stats_data
= palloc(kind_info
->shared_data_len
);
997 stats_data
= MemoryContextAlloc(pgStatLocal
.snapshot
.context
,
998 kind_info
->shared_data_len
);
1000 pgstat_lock_entry_shared(entry_ref
, false);
1002 pgstat_get_entry_data(kind
, entry_ref
->shared_stats
),
1003 kind_info
->shared_data_len
);
1004 pgstat_unlock_entry(entry_ref
);
1006 if (pgstat_fetch_consistency
> PGSTAT_FETCH_CONSISTENCY_NONE
)
1008 PgStat_SnapshotEntry
*entry
= NULL
;
1011 entry
= pgstat_snapshot_insert(pgStatLocal
.snapshot
.stats
, key
, &found
);
1012 entry
->data
= stats_data
;
1019 * If a stats snapshot has been taken, return the timestamp at which that was
1020 * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
1024 pgstat_get_stat_snapshot_timestamp(bool *have_snapshot
)
1026 if (force_stats_snapshot_clear
)
1027 pgstat_clear_snapshot();
1029 if (pgStatLocal
.snapshot
.mode
== PGSTAT_FETCH_CONSISTENCY_SNAPSHOT
)
1031 *have_snapshot
= true;
1032 return pgStatLocal
.snapshot
.snapshot_timestamp
;
1035 *have_snapshot
= false;
1041 pgstat_have_entry(PgStat_Kind kind
, Oid dboid
, uint64 objid
)
1043 /* fixed-numbered stats always exist */
1044 if (pgstat_get_kind_info(kind
)->fixed_amount
)
1047 return pgstat_get_entry_ref(kind
, dboid
, objid
, false, NULL
) != NULL
;
1051 * Ensure snapshot for fixed-numbered 'kind' exists.
1053 * Typically used by the pgstat_fetch_* functions for a kind of stats, before
1054 * massaging the data into the desired format.
1057 pgstat_snapshot_fixed(PgStat_Kind kind
)
1059 Assert(pgstat_is_kind_valid(kind
));
1060 Assert(pgstat_get_kind_info(kind
)->fixed_amount
);
1062 if (force_stats_snapshot_clear
)
1063 pgstat_clear_snapshot();
1065 if (pgstat_fetch_consistency
== PGSTAT_FETCH_CONSISTENCY_SNAPSHOT
)
1066 pgstat_build_snapshot();
1068 pgstat_build_snapshot_fixed(kind
);
1070 if (pgstat_is_kind_builtin(kind
))
1071 Assert(pgStatLocal
.snapshot
.fixed_valid
[kind
]);
1072 else if (pgstat_is_kind_custom(kind
))
1073 Assert(pgStatLocal
.snapshot
.custom_valid
[kind
- PGSTAT_KIND_CUSTOM_MIN
]);
1077 pgstat_init_snapshot_fixed(void)
1080 * Initialize fixed-numbered statistics data in snapshots, only for custom
1083 for (PgStat_Kind kind
= PGSTAT_KIND_CUSTOM_MIN
; kind
<= PGSTAT_KIND_CUSTOM_MAX
; kind
++)
1085 const PgStat_KindInfo
*kind_info
= pgstat_get_kind_info(kind
);
1087 if (!kind_info
|| !kind_info
->fixed_amount
)
1090 pgStatLocal
.snapshot
.custom_data
[kind
- PGSTAT_KIND_CUSTOM_MIN
] =
1091 MemoryContextAlloc(TopMemoryContext
, kind_info
->shared_data_len
);
1096 pgstat_prep_snapshot(void)
1098 if (force_stats_snapshot_clear
)
1099 pgstat_clear_snapshot();
1101 if (pgstat_fetch_consistency
== PGSTAT_FETCH_CONSISTENCY_NONE
||
1102 pgStatLocal
.snapshot
.stats
!= NULL
)
1105 if (!pgStatLocal
.snapshot
.context
)
1106 pgStatLocal
.snapshot
.context
= AllocSetContextCreate(TopMemoryContext
,
1108 ALLOCSET_SMALL_SIZES
);
1110 pgStatLocal
.snapshot
.stats
=
1111 pgstat_snapshot_create(pgStatLocal
.snapshot
.context
,
1112 PGSTAT_SNAPSHOT_HASH_SIZE
,
1117 pgstat_build_snapshot(void)
1119 dshash_seq_status hstat
;
1120 PgStatShared_HashEntry
*p
;
1122 /* should only be called when we need a snapshot */
1123 Assert(pgstat_fetch_consistency
== PGSTAT_FETCH_CONSISTENCY_SNAPSHOT
);
1125 /* snapshot already built */
1126 if (pgStatLocal
.snapshot
.mode
== PGSTAT_FETCH_CONSISTENCY_SNAPSHOT
)
1129 pgstat_prep_snapshot();
1131 Assert(pgStatLocal
.snapshot
.stats
->members
== 0);
1133 pgStatLocal
.snapshot
.snapshot_timestamp
= GetCurrentTimestamp();
1136 * Snapshot all variable stats.
1138 dshash_seq_init(&hstat
, pgStatLocal
.shared_hash
, false);
1139 while ((p
= dshash_seq_next(&hstat
)) != NULL
)
1141 PgStat_Kind kind
= p
->key
.kind
;
1142 const PgStat_KindInfo
*kind_info
= pgstat_get_kind_info(kind
);
1144 PgStat_SnapshotEntry
*entry
;
1145 PgStatShared_Common
*stats_data
;
1148 * Check if the stats object should be included in the snapshot.
1149 * Unless the stats kind can be accessed from all databases (e.g.,
1150 * database stats themselves), we only include stats for the current
1151 * database or objects not associated with a database (e.g. shared
1154 if (p
->key
.dboid
!= MyDatabaseId
&&
1155 p
->key
.dboid
!= InvalidOid
&&
1156 !kind_info
->accessed_across_databases
)
1162 Assert(pg_atomic_read_u32(&p
->refcount
) > 0);
1164 stats_data
= dsa_get_address(pgStatLocal
.dsa
, p
->body
);
1167 entry
= pgstat_snapshot_insert(pgStatLocal
.snapshot
.stats
, p
->key
, &found
);
1170 entry
->data
= MemoryContextAlloc(pgStatLocal
.snapshot
.context
,
1171 kind_info
->shared_size
);
1174 * Acquire the LWLock directly instead of using
1175 * pg_stat_lock_entry_shared() which requires a reference.
1177 LWLockAcquire(&stats_data
->lock
, LW_SHARED
);
1179 pgstat_get_entry_data(kind
, stats_data
),
1180 kind_info
->shared_size
);
1181 LWLockRelease(&stats_data
->lock
);
1183 dshash_seq_term(&hstat
);
1186 * Build snapshot of all fixed-numbered stats.
1188 for (PgStat_Kind kind
= PGSTAT_KIND_MIN
; kind
<= PGSTAT_KIND_MAX
; kind
++)
1190 const PgStat_KindInfo
*kind_info
= pgstat_get_kind_info(kind
);
1194 if (!kind_info
->fixed_amount
)
1196 Assert(kind_info
->snapshot_cb
== NULL
);
1200 pgstat_build_snapshot_fixed(kind
);
1203 pgStatLocal
.snapshot
.mode
= PGSTAT_FETCH_CONSISTENCY_SNAPSHOT
;
1207 pgstat_build_snapshot_fixed(PgStat_Kind kind
)
1209 const PgStat_KindInfo
*kind_info
= pgstat_get_kind_info(kind
);
1213 /* Position in fixed_valid or custom_valid */
1214 if (pgstat_is_kind_builtin(kind
))
1217 valid
= pgStatLocal
.snapshot
.fixed_valid
;
1221 idx
= kind
- PGSTAT_KIND_CUSTOM_MIN
;
1222 valid
= pgStatLocal
.snapshot
.custom_valid
;
1225 Assert(kind_info
->fixed_amount
);
1226 Assert(kind_info
->snapshot_cb
!= NULL
);
1228 if (pgstat_fetch_consistency
== PGSTAT_FETCH_CONSISTENCY_NONE
)
1230 /* rebuild every time */
1233 else if (valid
[idx
])
1235 /* in snapshot mode we shouldn't get called again */
1236 Assert(pgstat_fetch_consistency
== PGSTAT_FETCH_CONSISTENCY_CACHE
);
1240 Assert(!valid
[idx
]);
1242 kind_info
->snapshot_cb();
1244 Assert(!valid
[idx
]);
1249 /* ------------------------------------------------------------
1250 * Backend-local pending stats infrastructure
1251 * ------------------------------------------------------------
1255 * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
1256 * stats if not already done.
1258 * If created_entry is non-NULL, it'll be set to true if the entry is newly
1259 * created, false otherwise.
1262 pgstat_prep_pending_entry(PgStat_Kind kind
, Oid dboid
, uint64 objid
, bool *created_entry
)
1264 PgStat_EntryRef
*entry_ref
;
1266 /* need to be able to flush out */
1267 Assert(pgstat_get_kind_info(kind
)->flush_pending_cb
!= NULL
);
1269 if (unlikely(!pgStatPendingContext
))
1271 pgStatPendingContext
=
1272 AllocSetContextCreate(TopMemoryContext
,
1274 ALLOCSET_SMALL_SIZES
);
1277 entry_ref
= pgstat_get_entry_ref(kind
, dboid
, objid
,
1278 true, created_entry
);
1280 if (entry_ref
->pending
== NULL
)
1282 size_t entrysize
= pgstat_get_kind_info(kind
)->pending_size
;
1284 Assert(entrysize
!= (size_t) -1);
1286 entry_ref
->pending
= MemoryContextAllocZero(pgStatPendingContext
, entrysize
);
1287 dlist_push_tail(&pgStatPending
, &entry_ref
->pending_node
);
1294 * Return an existing stats entry, or NULL.
1296 * This should only be used for helper function for pgstatfuncs.c - outside of
1297 * that it shouldn't be needed.
1300 pgstat_fetch_pending_entry(PgStat_Kind kind
, Oid dboid
, uint64 objid
)
1302 PgStat_EntryRef
*entry_ref
;
1304 entry_ref
= pgstat_get_entry_ref(kind
, dboid
, objid
, false, NULL
);
1306 if (entry_ref
== NULL
|| entry_ref
->pending
== NULL
)
1313 pgstat_delete_pending_entry(PgStat_EntryRef
*entry_ref
)
1315 PgStat_Kind kind
= entry_ref
->shared_entry
->key
.kind
;
1316 const PgStat_KindInfo
*kind_info
= pgstat_get_kind_info(kind
);
1317 void *pending_data
= entry_ref
->pending
;
1319 Assert(pending_data
!= NULL
);
1320 /* !fixed_amount stats should be handled explicitly */
1321 Assert(!pgstat_get_kind_info(kind
)->fixed_amount
);
1323 if (kind_info
->delete_pending_cb
)
1324 kind_info
->delete_pending_cb(entry_ref
);
1326 pfree(pending_data
);
1327 entry_ref
->pending
= NULL
;
1329 dlist_delete(&entry_ref
->pending_node
);
1333 * Flush out pending stats for database objects (databases, relations,
1337 pgstat_flush_pending_entries(bool nowait
)
1339 bool have_pending
= false;
1340 dlist_node
*cur
= NULL
;
1343 * Need to be a bit careful iterating over the list of pending entries.
1344 * Processing a pending entry may queue further pending entries to the end
1345 * of the list that we want to process, so a simple iteration won't do.
1346 * Further complicating matters is that we want to delete the current
1347 * entry in each iteration from the list if we flushed successfully.
1349 * So we just keep track of the next pointer in each loop iteration.
1351 if (!dlist_is_empty(&pgStatPending
))
1352 cur
= dlist_head_node(&pgStatPending
);
1356 PgStat_EntryRef
*entry_ref
=
1357 dlist_container(PgStat_EntryRef
, pending_node
, cur
);
1358 PgStat_HashKey key
= entry_ref
->shared_entry
->key
;
1359 PgStat_Kind kind
= key
.kind
;
1360 const PgStat_KindInfo
*kind_info
= pgstat_get_kind_info(kind
);
1364 Assert(!kind_info
->fixed_amount
);
1365 Assert(kind_info
->flush_pending_cb
!= NULL
);
1367 /* flush the stats, if possible */
1368 did_flush
= kind_info
->flush_pending_cb(entry_ref
, nowait
);
1370 Assert(did_flush
|| nowait
);
1372 /* determine next entry, before deleting the pending entry */
1373 if (dlist_has_next(&pgStatPending
, cur
))
1374 next
= dlist_next_node(&pgStatPending
, cur
);
1378 /* if successfully flushed, remove entry */
1380 pgstat_delete_pending_entry(entry_ref
);
1382 have_pending
= true;
1387 Assert(dlist_is_empty(&pgStatPending
) == !have_pending
);
1389 return have_pending
;
1393 /* ------------------------------------------------------------
1394 * Helper / infrastructure functions
1395 * ------------------------------------------------------------
1399 pgstat_get_kind_from_str(char *kind_str
)
1401 for (PgStat_Kind kind
= PGSTAT_KIND_BUILTIN_MIN
; kind
<= PGSTAT_KIND_BUILTIN_MAX
; kind
++)
1403 if (pg_strcasecmp(kind_str
, pgstat_kind_builtin_infos
[kind
].name
) == 0)
1407 /* Check the custom set of cumulative stats */
1408 if (pgstat_kind_custom_infos
)
1410 for (PgStat_Kind kind
= PGSTAT_KIND_CUSTOM_MIN
; kind
<= PGSTAT_KIND_CUSTOM_MAX
; kind
++)
1412 uint32 idx
= kind
- PGSTAT_KIND_CUSTOM_MIN
;
1414 if (pgstat_kind_custom_infos
[idx
] &&
1415 pg_strcasecmp(kind_str
, pgstat_kind_custom_infos
[idx
]->name
) == 0)
1421 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
1422 errmsg("invalid statistics kind: \"%s\"", kind_str
)));
1423 return PGSTAT_KIND_INVALID
; /* avoid compiler warnings */
1427 pgstat_is_kind_valid(PgStat_Kind kind
)
1429 return pgstat_is_kind_builtin(kind
) || pgstat_is_kind_custom(kind
);
1432 const PgStat_KindInfo
*
1433 pgstat_get_kind_info(PgStat_Kind kind
)
1435 if (pgstat_is_kind_builtin(kind
))
1436 return &pgstat_kind_builtin_infos
[kind
];
1438 if (pgstat_is_kind_custom(kind
))
1440 uint32 idx
= kind
- PGSTAT_KIND_CUSTOM_MIN
;
1442 if (pgstat_kind_custom_infos
== NULL
||
1443 pgstat_kind_custom_infos
[idx
] == NULL
)
1445 return pgstat_kind_custom_infos
[idx
];
1452 * Register a new stats kind.
1454 * PgStat_Kinds must be globally unique across all extensions. Refer
1455 * to https://wiki.postgresql.org/wiki/CustomCumulativeStats to reserve a
1456 * unique ID for your extension, to avoid conflicts with other extension
1457 * developers. During development, use PGSTAT_KIND_EXPERIMENTAL to avoid
1458 * needlessly reserving a new ID.
1461 pgstat_register_kind(PgStat_Kind kind
, const PgStat_KindInfo
*kind_info
)
1463 uint32 idx
= kind
- PGSTAT_KIND_CUSTOM_MIN
;
1465 if (kind_info
->name
== NULL
|| strlen(kind_info
->name
) == 0)
1467 (errmsg("custom cumulative statistics name is invalid"),
1468 errhint("Provide a non-empty name for the custom cumulative statistics.")));
1470 if (!pgstat_is_kind_custom(kind
))
1471 ereport(ERROR
, (errmsg("custom cumulative statistics ID %u is out of range", kind
),
1472 errhint("Provide a custom cumulative statistics ID between %u and %u.",
1473 PGSTAT_KIND_CUSTOM_MIN
, PGSTAT_KIND_CUSTOM_MAX
)));
1475 if (!process_shared_preload_libraries_in_progress
)
1477 (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info
->name
, kind
),
1478 errdetail("Custom cumulative statistics must be registered while initializing modules in \"shared_preload_libraries\".")));
1481 * Check some data for fixed-numbered stats.
1483 if (kind_info
->fixed_amount
)
1485 if (kind_info
->shared_size
== 0)
1487 (errmsg("custom cumulative statistics property is invalid"),
1488 errhint("Custom cumulative statistics require a shared memory size for fixed-numbered objects.")));
1492 * If pgstat_kind_custom_infos is not available yet, allocate it.
1494 if (pgstat_kind_custom_infos
== NULL
)
1496 pgstat_kind_custom_infos
= (const PgStat_KindInfo
**)
1497 MemoryContextAllocZero(TopMemoryContext
,
1498 sizeof(PgStat_KindInfo
*) * PGSTAT_KIND_CUSTOM_SIZE
);
1501 if (pgstat_kind_custom_infos
[idx
] != NULL
&&
1502 pgstat_kind_custom_infos
[idx
]->name
!= NULL
)
1504 (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info
->name
, kind
),
1505 errdetail("Custom cumulative statistics \"%s\" already registered with the same ID.",
1506 pgstat_kind_custom_infos
[idx
]->name
)));
1508 /* check for existing custom stats with the same name */
1509 for (PgStat_Kind existing_kind
= PGSTAT_KIND_CUSTOM_MIN
; existing_kind
<= PGSTAT_KIND_CUSTOM_MAX
; existing_kind
++)
1511 uint32 existing_idx
= existing_kind
- PGSTAT_KIND_CUSTOM_MIN
;
1513 if (pgstat_kind_custom_infos
[existing_idx
] == NULL
)
1515 if (!pg_strcasecmp(pgstat_kind_custom_infos
[existing_idx
]->name
, kind_info
->name
))
1517 (errmsg("failed to register custom cumulative statistics \"%s\" with ID %u", kind_info
->name
, kind
),
1518 errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind
)));
1522 pgstat_kind_custom_infos
[idx
] = kind_info
;
1524 (errmsg("registered custom cumulative statistics \"%s\" with ID %u",
1525 kind_info
->name
, kind
)));
1529 * Stats should only be reported after pgstat_initialize() and before
1530 * pgstat_shutdown(). This check is put in a few central places to catch
1531 * violations of this rule more easily.
1533 #ifdef USE_ASSERT_CHECKING
1535 pgstat_assert_is_up(void)
1537 Assert(pgstat_is_initialized
&& !pgstat_is_shutdown
);
1542 /* ------------------------------------------------------------
1543 * reading and writing of on-disk stats file
1544 * ------------------------------------------------------------
1547 /* helpers for pgstat_write_statsfile() */
1549 write_chunk(FILE *fpout
, void *ptr
, size_t len
)
1553 rc
= fwrite(ptr
, len
, 1, fpout
);
1555 /* we'll check for errors with ferror once at the end */
1559 #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
1562 * This function is called in the last process that is accessing the shared
1563 * stats so locking is not required.
1566 pgstat_write_statsfile(XLogRecPtr redo
)
1570 const char *tmpfile
= PGSTAT_STAT_PERMANENT_TMPFILE
;
1571 const char *statfile
= PGSTAT_STAT_PERMANENT_FILENAME
;
1572 dshash_seq_status hstat
;
1573 PgStatShared_HashEntry
*ps
;
1575 pgstat_assert_is_up();
1577 /* should be called only by the checkpointer or single user mode */
1578 Assert(!IsUnderPostmaster
|| MyBackendType
== B_CHECKPOINTER
);
1580 /* we're shutting down, so it's ok to just override this */
1581 pgstat_fetch_consistency
= PGSTAT_FETCH_CONSISTENCY_NONE
;
1583 elog(DEBUG2
, "writing stats file \"%s\" with redo %X/%X", statfile
,
1584 LSN_FORMAT_ARGS(redo
));
1587 * Open the statistics temp file to write out the current values.
1589 fpout
= AllocateFile(tmpfile
, PG_BINARY_W
);
1593 (errcode_for_file_access(),
1594 errmsg("could not open temporary statistics file \"%s\": %m",
1600 * Write the file header --- currently just a format ID.
1602 format_id
= PGSTAT_FILE_FORMAT_ID
;
1603 write_chunk_s(fpout
, &format_id
);
1605 /* Write the redo LSN, used to cross check the file read */
1606 write_chunk_s(fpout
, &redo
);
1608 /* Write various stats structs for fixed number of objects */
1609 for (PgStat_Kind kind
= PGSTAT_KIND_MIN
; kind
<= PGSTAT_KIND_MAX
; kind
++)
1612 const PgStat_KindInfo
*info
= pgstat_get_kind_info(kind
);
1614 if (!info
|| !info
->fixed_amount
)
1617 if (pgstat_is_kind_builtin(kind
))
1618 Assert(info
->snapshot_ctl_off
!= 0);
1620 pgstat_build_snapshot_fixed(kind
);
1621 if (pgstat_is_kind_builtin(kind
))
1622 ptr
= ((char *) &pgStatLocal
.snapshot
) + info
->snapshot_ctl_off
;
1624 ptr
= pgStatLocal
.snapshot
.custom_data
[kind
- PGSTAT_KIND_CUSTOM_MIN
];
1626 fputc(PGSTAT_FILE_ENTRY_FIXED
, fpout
);
1627 write_chunk_s(fpout
, &kind
);
1628 write_chunk(fpout
, ptr
, info
->shared_data_len
);
1632 * Walk through the stats entries
1634 dshash_seq_init(&hstat
, pgStatLocal
.shared_hash
, false);
1635 while ((ps
= dshash_seq_next(&hstat
)) != NULL
)
1637 PgStatShared_Common
*shstats
;
1638 const PgStat_KindInfo
*kind_info
= NULL
;
1640 CHECK_FOR_INTERRUPTS();
1642 /* we may have some "dropped" entries not yet removed, skip them */
1643 Assert(!ps
->dropped
);
1648 * This discards data related to custom stats kinds that are unknown
1651 if (!pgstat_is_kind_valid(ps
->key
.kind
))
1653 elog(WARNING
, "found unknown stats entry %u/%u/%llu",
1654 ps
->key
.kind
, ps
->key
.dboid
,
1655 (unsigned long long) ps
->key
.objid
);
1659 shstats
= (PgStatShared_Common
*) dsa_get_address(pgStatLocal
.dsa
, ps
->body
);
1661 kind_info
= pgstat_get_kind_info(ps
->key
.kind
);
1663 /* if not dropped the valid-entry refcount should exist */
1664 Assert(pg_atomic_read_u32(&ps
->refcount
) > 0);
1666 if (!kind_info
->to_serialized_name
)
1668 /* normal stats entry, identified by PgStat_HashKey */
1669 fputc(PGSTAT_FILE_ENTRY_HASH
, fpout
);
1670 write_chunk_s(fpout
, &ps
->key
);
1674 /* stats entry identified by name on disk (e.g. slots) */
1677 kind_info
->to_serialized_name(&ps
->key
, shstats
, &name
);
1679 fputc(PGSTAT_FILE_ENTRY_NAME
, fpout
);
1680 write_chunk_s(fpout
, &ps
->key
.kind
);
1681 write_chunk_s(fpout
, &name
);
1684 /* Write except the header part of the entry */
1686 pgstat_get_entry_data(ps
->key
.kind
, shstats
),
1687 pgstat_get_entry_len(ps
->key
.kind
));
1689 dshash_seq_term(&hstat
);
1692 * No more output to be done. Close the temp file and replace the old
1693 * pgstat.stat with it. The ferror() check replaces testing for error
1694 * after each individual fputc or fwrite (in write_chunk()) above.
1696 fputc(PGSTAT_FILE_ENTRY_END
, fpout
);
1701 (errcode_for_file_access(),
1702 errmsg("could not write temporary statistics file \"%s\": %m",
1707 else if (FreeFile(fpout
) < 0)
1710 (errcode_for_file_access(),
1711 errmsg("could not close temporary statistics file \"%s\": %m",
1715 else if (durable_rename(tmpfile
, statfile
, LOG
) < 0)
1717 /* durable_rename already emitted log message */
1722 /* helpers for pgstat_read_statsfile() */
1724 read_chunk(FILE *fpin
, void *ptr
, size_t len
)
1726 return fread(ptr
, 1, len
, fpin
) == len
;
1729 #define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
1732 * Reads in existing statistics file into memory.
1734 * This function is called in the only process that is accessing the shared
1735 * stats so locking is not required.
1738 pgstat_read_statsfile(XLogRecPtr redo
)
1743 const char *statfile
= PGSTAT_STAT_PERMANENT_FILENAME
;
1744 PgStat_ShmemControl
*shmem
= pgStatLocal
.shmem
;
1745 XLogRecPtr file_redo
;
1747 /* shouldn't be called from postmaster */
1748 Assert(IsUnderPostmaster
|| !IsPostmasterEnvironment
);
1750 elog(DEBUG2
, "reading stats file \"%s\" with redo %X/%X", statfile
,
1751 LSN_FORMAT_ARGS(redo
));
1754 * Try to open the stats file. If it doesn't exist, the backends simply
1755 * returns zero for anything and statistics simply starts from scratch
1756 * with empty counters.
1758 * ENOENT is a possibility if stats collection was previously disabled or
1759 * has not yet written the stats file for the first time. Any other
1760 * failure condition is suspicious.
1762 if ((fpin
= AllocateFile(statfile
, PG_BINARY_R
)) == NULL
)
1764 if (errno
!= ENOENT
)
1766 (errcode_for_file_access(),
1767 errmsg("could not open statistics file \"%s\": %m",
1769 pgstat_reset_after_failure();
1774 * Verify it's of the expected format.
1776 if (!read_chunk_s(fpin
, &format_id
))
1778 elog(WARNING
, "could not read format ID");
1782 if (format_id
!= PGSTAT_FILE_FORMAT_ID
)
1784 elog(WARNING
, "found incorrect format ID %d (expected %d)",
1785 format_id
, PGSTAT_FILE_FORMAT_ID
);
1790 * Read the redo LSN stored in the file.
1792 if (!read_chunk_s(fpin
, &file_redo
))
1794 elog(WARNING
, "could not read redo LSN");
1798 if (file_redo
!= redo
)
1800 elog(WARNING
, "found incorrect redo LSN %X/%X (expected %X/%X)",
1801 LSN_FORMAT_ARGS(file_redo
), LSN_FORMAT_ARGS(redo
));
1806 * We found an existing statistics file. Read it and put all the stats
1811 int t
= fgetc(fpin
);
1815 case PGSTAT_FILE_ENTRY_FIXED
:
1818 const PgStat_KindInfo
*info
;
1821 /* entry for fixed-numbered stats */
1822 if (!read_chunk_s(fpin
, &kind
))
1824 elog(WARNING
, "could not read stats kind for entry of type %c", t
);
1828 if (!pgstat_is_kind_valid(kind
))
1830 elog(WARNING
, "invalid stats kind %u for entry of type %c",
1835 info
= pgstat_get_kind_info(kind
);
1838 elog(WARNING
, "could not find information of kind %u for entry of type %c",
1843 if (!info
->fixed_amount
)
1845 elog(WARNING
, "invalid fixed_amount in stats kind %u for entry of type %c",
1850 /* Load back stats into shared memory */
1851 if (pgstat_is_kind_builtin(kind
))
1852 ptr
= ((char *) shmem
) + info
->shared_ctl_off
+
1853 info
->shared_data_off
;
1856 int idx
= kind
- PGSTAT_KIND_CUSTOM_MIN
;
1858 ptr
= ((char *) shmem
->custom_data
[idx
]) +
1859 info
->shared_data_off
;
1862 if (!read_chunk(fpin
, ptr
, info
->shared_data_len
))
1864 elog(WARNING
, "could not read data of stats kind %u for entry of type %c with size %u",
1865 kind
, t
, info
->shared_data_len
);
1871 case PGSTAT_FILE_ENTRY_HASH
:
1872 case PGSTAT_FILE_ENTRY_NAME
:
1875 PgStatShared_HashEntry
*p
;
1876 PgStatShared_Common
*header
;
1878 CHECK_FOR_INTERRUPTS();
1880 if (t
== PGSTAT_FILE_ENTRY_HASH
)
1882 /* normal stats entry, identified by PgStat_HashKey */
1883 if (!read_chunk_s(fpin
, &key
))
1885 elog(WARNING
, "could not read key for entry of type %c", t
);
1889 if (!pgstat_is_kind_valid(key
.kind
))
1891 elog(WARNING
, "invalid stats kind for entry %u/%u/%llu of type %c",
1892 key
.kind
, key
.dboid
,
1893 (unsigned long long) key
.objid
, t
);
1899 /* stats entry identified by name on disk (e.g. slots) */
1900 const PgStat_KindInfo
*kind_info
= NULL
;
1904 if (!read_chunk_s(fpin
, &kind
))
1906 elog(WARNING
, "could not read stats kind for entry of type %c", t
);
1909 if (!read_chunk_s(fpin
, &name
))
1911 elog(WARNING
, "could not read name of stats kind %u for entry of type %c",
1915 if (!pgstat_is_kind_valid(kind
))
1917 elog(WARNING
, "invalid stats kind %u for entry of type %c",
1922 kind_info
= pgstat_get_kind_info(kind
);
1925 elog(WARNING
, "could not find information of kind %u for entry of type %c",
1930 if (!kind_info
->from_serialized_name
)
1932 elog(WARNING
, "invalid from_serialized_name in stats kind %u for entry of type %c",
1937 if (!kind_info
->from_serialized_name(&name
, &key
))
1939 /* skip over data for entry we don't care about */
1940 if (fseek(fpin
, pgstat_get_entry_len(kind
), SEEK_CUR
) != 0)
1942 elog(WARNING
, "could not seek \"%s\" of stats kind %u for entry of type %c",
1943 NameStr(name
), kind
, t
);
1950 Assert(key
.kind
== kind
);
1954 * This intentionally doesn't use pgstat_get_entry_ref() -
1955 * putting all stats into checkpointer's
1956 * pgStatEntryRefHash would be wasted effort and memory.
1958 p
= dshash_find_or_insert(pgStatLocal
.shared_hash
, &key
, &found
);
1960 /* don't allow duplicate entries */
1963 dshash_release_lock(pgStatLocal
.shared_hash
, p
);
1964 elog(WARNING
, "found duplicate stats entry %u/%u/%llu of type %c",
1965 key
.kind
, key
.dboid
,
1966 (unsigned long long) key
.objid
, t
);
1970 header
= pgstat_init_entry(key
.kind
, p
);
1971 dshash_release_lock(pgStatLocal
.shared_hash
, p
);
1973 if (!read_chunk(fpin
,
1974 pgstat_get_entry_data(key
.kind
, header
),
1975 pgstat_get_entry_len(key
.kind
)))
1977 elog(WARNING
, "could not read data for entry %u/%u/%llu of type %c",
1978 key
.kind
, key
.dboid
,
1979 (unsigned long long) key
.objid
, t
);
1985 case PGSTAT_FILE_ENTRY_END
:
1988 * check that PGSTAT_FILE_ENTRY_END actually signals end of
1991 if (fgetc(fpin
) != EOF
)
1993 elog(WARNING
, "could not read end-of-file");
2000 elog(WARNING
, "could not read entry of type %c", t
);
2008 elog(DEBUG2
, "removing permanent stats file \"%s\"", statfile
);
2015 (errmsg("corrupted statistics file \"%s\"", statfile
)));
2017 pgstat_reset_after_failure();
2023 * Helper to reset / drop stats after a crash or after restoring stats from
2024 * disk failed, potentially after already loading parts.
2027 pgstat_reset_after_failure(void)
2029 TimestampTz ts
= GetCurrentTimestamp();
2031 /* reset fixed-numbered stats */
2032 for (PgStat_Kind kind
= PGSTAT_KIND_MIN
; kind
<= PGSTAT_KIND_MAX
; kind
++)
2034 const PgStat_KindInfo
*kind_info
= pgstat_get_kind_info(kind
);
2036 if (!kind_info
|| !kind_info
->fixed_amount
)
2039 kind_info
->reset_all_cb(ts
);
2042 /* and drop variable-numbered ones */
2043 pgstat_drop_all_entries();
2047 * GUC assign_hook for stats_fetch_consistency.
2050 assign_stats_fetch_consistency(int newval
, void *extra
)
2053 * Changing this value in a transaction may cause snapshot state
2054 * inconsistencies, so force a clear of the current snapshot on the next
2055 * snapshot build attempt.
2057 if (pgstat_fetch_consistency
!= newval
)
2058 force_stats_snapshot_clear
= true;