Only skip pages marked as clean in the visibility map, if the last 32
[PostgreSQL.git] / contrib / pg_stat_statements / pg_stat_statements.c
blob81e1dce936da835235635cb71b1c36d2a4971807
1 /*-------------------------------------------------------------------------
3 * pg_stat_statements.c
4 * Track statement execution times across a whole database cluster.
6 * Note about locking issues: to create or delete an entry in the shared
7 * hashtable, one must hold pgss->lock exclusively. Modifying any field
8 * in an entry except the counters requires the same. To look up an entry,
9 * one must hold the lock shared. To read or update the counters within
10 * an entry, one must hold the lock shared or exclusive (so the entry doesn't
11 * disappear!) and also take the entry's mutex spinlock.
14 * Copyright (c) 2008-2009, PostgreSQL Global Development Group
16 * IDENTIFICATION
17 * $PostgreSQL$
19 *-------------------------------------------------------------------------
21 #include "postgres.h"
23 #include <unistd.h>
25 #include "access/hash.h"
26 #include "catalog/pg_type.h"
27 #include "executor/executor.h"
28 #include "executor/instrument.h"
29 #include "mb/pg_wchar.h"
30 #include "miscadmin.h"
31 #include "pgstat.h"
32 #include "storage/fd.h"
33 #include "storage/ipc.h"
34 #include "storage/spin.h"
35 #include "utils/builtins.h"
36 #include "utils/hsearch.h"
37 #include "utils/guc.h"
40 PG_MODULE_MAGIC;
42 /* Location of stats file */
43 #define PGSS_DUMP_FILE "global/pg_stat_statements.stat"
45 /* This constant defines the magic number in the stats file header */
46 static const uint32 PGSS_FILE_HEADER = 0x20081202;
48 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
49 #define USAGE_EXEC(duration) (1.0)
50 #define USAGE_INIT (1.0) /* including initial planning */
51 #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
52 #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
55 * Hashtable key that defines the identity of a hashtable entry. The
56 * hash comparators do not assume that the query string is null-terminated;
57 * this lets us search for an mbcliplen'd string without copying it first.
59 * Presently, the query encoding is fully determined by the source database
60 * and so we don't really need it to be in the key. But that might not always
61 * be true. Anyway it's notationally convenient to pass it as part of the key.
63 typedef struct pgssHashKey
65 Oid userid; /* user OID */
66 Oid dbid; /* database OID */
67 int encoding; /* query encoding */
68 int query_len; /* # of valid bytes in query string */
69 const char *query_ptr; /* query string proper */
70 } pgssHashKey;
73 * The actual stats counters kept within pgssEntry.
75 typedef struct Counters
77 int64 calls; /* # of times executed */
78 double total_time; /* total execution time in seconds */
79 int64 rows; /* total # of retrieved or affected rows */
80 double usage; /* usage factor */
81 } Counters;
84 * Statistics per statement
86 * NB: see the file read/write code before changing field order here.
88 typedef struct pgssEntry
90 pgssHashKey key; /* hash key of entry - MUST BE FIRST */
91 Counters counters; /* the statistics for this query */
92 slock_t mutex; /* protects the counters only */
93 char query[1]; /* VARIABLE LENGTH ARRAY - MUST BE LAST */
94 /* Note: the allocated length of query[] is actually pgss->query_size */
95 } pgssEntry;
98 * Global shared state
100 typedef struct pgssSharedState
102 LWLockId lock; /* protects hashtable search/modification */
103 int query_size; /* max query length in bytes */
104 } pgssSharedState;
106 /*---- Local variables ----*/
108 /* Current nesting depth of ExecutorRun calls */
109 static int nested_level = 0;
110 /* Saved hook values in case of unload */
111 static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
112 static ExecutorStart_hook_type prev_ExecutorStart = NULL;
113 static ExecutorRun_hook_type prev_ExecutorRun = NULL;
114 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
115 /* Links to shared memory state */
116 static pgssSharedState *pgss = NULL;
117 static HTAB *pgss_hash = NULL;
119 /*---- GUC variables ----*/
121 typedef enum
123 PGSS_TRACK_NONE, /* track no statements */
124 PGSS_TRACK_TOP, /* only top level statements */
125 PGSS_TRACK_ALL, /* all statements, including nested ones */
126 } PGSSTrackLevel;
128 static const struct config_enum_entry track_options[] = {
129 {"none", PGSS_TRACK_NONE, false},
130 {"top", PGSS_TRACK_TOP, false},
131 {"all", PGSS_TRACK_ALL, false},
132 {NULL, 0, false}
135 static int pgss_max; /* max # statements to track */
136 static int pgss_track; /* tracking level */
137 static bool pgss_save; /* whether to save stats across shutdown */
140 #define pgss_enabled() \
141 (pgss_track == PGSS_TRACK_ALL || \
142 (pgss_track == PGSS_TRACK_TOP && nested_level == 0))
144 /*---- Function declarations ----*/
146 void _PG_init(void);
147 void _PG_fini(void);
149 Datum pg_stat_statements_reset(PG_FUNCTION_ARGS);
150 Datum pg_stat_statements(PG_FUNCTION_ARGS);
152 PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
153 PG_FUNCTION_INFO_V1(pg_stat_statements);
155 static void pgss_shmem_startup(void);
156 static void pgss_shmem_shutdown(int code, Datum arg);
157 static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
158 static void pgss_ExecutorRun(QueryDesc *queryDesc,
159 ScanDirection direction,
160 long count);
161 static void pgss_ExecutorEnd(QueryDesc *queryDesc);
162 static uint32 pgss_hash_fn(const void *key, Size keysize);
163 static int pgss_match_fn(const void *key1, const void *key2, Size keysize);
164 static void pgss_store(const char *query,
165 const Instrumentation *instr, uint32 rows);
166 static Size pgss_memsize(void);
167 static pgssEntry *entry_alloc(pgssHashKey *key);
168 static void entry_dealloc(void);
169 static void entry_reset(void);
173 * Module load callback
175 void
176 _PG_init(void)
179 * In order to create our shared memory area, we have to be loaded via
180 * shared_preload_libraries. If not, fall out without hooking into
181 * any of the main system. (We don't throw error here because it seems
182 * useful to allow the pg_stat_statements functions to be created even
183 * when the module isn't active. The functions must protect themselves
184 * against being called then, however.)
186 if (!process_shared_preload_libraries_in_progress)
187 return;
190 * Define (or redefine) custom GUC variables.
192 DefineCustomIntVariable("pg_stat_statements.max",
193 "Sets the maximum number of statements tracked by pg_stat_statements.",
194 NULL,
195 &pgss_max,
196 1000,
197 100,
198 INT_MAX,
199 PGC_POSTMASTER,
201 NULL,
202 NULL);
204 DefineCustomEnumVariable("pg_stat_statements.track",
205 "Selects which statements are tracked by pg_stat_statements.",
206 NULL,
207 &pgss_track,
208 PGSS_TRACK_TOP,
209 track_options,
210 PGC_SUSET,
212 NULL,
213 NULL);
215 DefineCustomBoolVariable("pg_stat_statements.save",
216 "Save pg_stat_statements statistics across server shutdowns.",
217 NULL,
218 &pgss_save,
219 true,
220 PGC_SIGHUP,
222 NULL,
223 NULL);
225 EmitWarningsOnPlaceholders("pg_stat_statements");
228 * Request additional shared resources. (These are no-ops if we're not in
229 * the postmaster process.) We'll allocate or attach to the shared
230 * resources in pgss_shmem_startup().
232 RequestAddinShmemSpace(pgss_memsize());
233 RequestAddinLWLocks(1);
236 * Install hooks.
238 prev_shmem_startup_hook = shmem_startup_hook;
239 shmem_startup_hook = pgss_shmem_startup;
240 prev_ExecutorStart = ExecutorStart_hook;
241 ExecutorStart_hook = pgss_ExecutorStart;
242 prev_ExecutorRun = ExecutorRun_hook;
243 ExecutorRun_hook = pgss_ExecutorRun;
244 prev_ExecutorEnd = ExecutorEnd_hook;
245 ExecutorEnd_hook = pgss_ExecutorEnd;
249 * Module unload callback
251 void
252 _PG_fini(void)
254 /* Uninstall hooks. */
255 ExecutorStart_hook = prev_ExecutorStart;
256 ExecutorRun_hook = prev_ExecutorRun;
257 ExecutorEnd_hook = prev_ExecutorEnd;
258 shmem_startup_hook = prev_shmem_startup_hook;
262 * shmem_startup hook: allocate or attach to shared memory,
263 * then load any pre-existing statistics from file.
265 static void
266 pgss_shmem_startup(void)
268 bool found;
269 HASHCTL info;
270 FILE *file;
271 uint32 header;
272 int32 num;
273 int32 i;
274 int query_size;
275 int buffer_size;
276 char *buffer = NULL;
278 if (prev_shmem_startup_hook)
279 prev_shmem_startup_hook();
281 /* reset in case this is a restart within the postmaster */
282 pgss = NULL;
283 pgss_hash = NULL;
286 * Create or attach to the shared memory state, including hash table
288 LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
290 pgss = ShmemInitStruct("pg_stat_statements",
291 sizeof(pgssSharedState),
292 &found);
293 if (!pgss)
294 elog(ERROR, "out of shared memory");
296 if (!found)
298 /* First time through ... */
299 pgss->lock = LWLockAssign();
300 pgss->query_size = pgstat_track_activity_query_size;
303 /* Be sure everyone agrees on the hash table entry size */
304 query_size = pgss->query_size;
306 memset(&info, 0, sizeof(info));
307 info.keysize = sizeof(pgssHashKey);
308 info.entrysize = offsetof(pgssEntry, query) + query_size;
309 info.hash = pgss_hash_fn;
310 info.match = pgss_match_fn;
311 pgss_hash = ShmemInitHash("pg_stat_statements hash",
312 pgss_max, pgss_max,
313 &info,
314 HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
315 if (!pgss_hash)
316 elog(ERROR, "out of shared memory");
318 LWLockRelease(AddinShmemInitLock);
321 * If we're in the postmaster (or a standalone backend...), set up a
322 * shmem exit hook to dump the statistics to disk.
324 if (!IsUnderPostmaster)
325 on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
328 * Attempt to load old statistics from the dump file.
330 * Note: we don't bother with locks here, because there should be no
331 * other processes running when this is called.
333 if (!pgss_save)
334 return;
336 file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
337 if (file == NULL)
339 if (errno == ENOENT)
340 return; /* ignore not-found error */
341 goto error;
344 buffer_size = query_size;
345 buffer = (char *) palloc(buffer_size);
347 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
348 header != PGSS_FILE_HEADER ||
349 fread(&num, sizeof(int32), 1, file) != 1)
350 goto error;
352 for (i = 0; i < num; i++)
354 pgssEntry temp;
355 pgssEntry *entry;
357 if (fread(&temp, offsetof(pgssEntry, mutex), 1, file) != 1)
358 goto error;
360 /* Encoding is the only field we can easily sanity-check */
361 if (!PG_VALID_BE_ENCODING(temp.key.encoding))
362 goto error;
364 /* Previous incarnation might have had a larger query_size */
365 if (temp.key.query_len >= buffer_size)
367 buffer = (char *) repalloc(buffer, temp.key.query_len + 1);
368 buffer_size = temp.key.query_len + 1;
371 if (fread(buffer, 1, temp.key.query_len, file) != temp.key.query_len)
372 goto error;
373 buffer[temp.key.query_len] = '\0';
375 /* Clip to available length if needed */
376 if (temp.key.query_len >= query_size)
377 temp.key.query_len = pg_encoding_mbcliplen(temp.key.encoding,
378 buffer,
379 temp.key.query_len,
380 query_size - 1);
381 temp.key.query_ptr = buffer;
383 /* make the hashtable entry (discards old entries if too many) */
384 entry = entry_alloc(&temp.key);
386 /* copy in the actual stats */
387 entry->counters = temp.counters;
390 pfree(buffer);
391 FreeFile(file);
392 return;
394 error:
395 ereport(LOG,
396 (errcode_for_file_access(),
397 errmsg("could not read pg_stat_statement file \"%s\": %m",
398 PGSS_DUMP_FILE)));
399 if (buffer)
400 pfree(buffer);
401 if (file)
402 FreeFile(file);
403 /* If possible, throw away the bogus file; ignore any error */
404 unlink(PGSS_DUMP_FILE);
408 * shmem_shutdown hook: Dump statistics into file.
410 * Note: we don't bother with acquiring lock, because there should be no
411 * other processes running when this is called.
413 static void
414 pgss_shmem_shutdown(int code, Datum arg)
416 FILE *file;
417 HASH_SEQ_STATUS hash_seq;
418 int32 num_entries;
419 pgssEntry *entry;
421 /* Don't try to dump during a crash. */
422 if (code)
423 return;
425 /* Safety check ... shouldn't get here unless shmem is set up. */
426 if (!pgss || !pgss_hash)
427 return;
429 /* Don't dump if told not to. */
430 if (!pgss_save)
431 return;
433 file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_W);
434 if (file == NULL)
435 goto error;
437 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
438 goto error;
439 num_entries = hash_get_num_entries(pgss_hash);
440 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
441 goto error;
443 hash_seq_init(&hash_seq, pgss_hash);
444 while ((entry = hash_seq_search(&hash_seq)) != NULL)
446 int len = entry->key.query_len;
448 if (fwrite(entry, offsetof(pgssEntry, mutex), 1, file) != 1 ||
449 fwrite(entry->query, 1, len, file) != len)
450 goto error;
453 if (FreeFile(file))
455 file = NULL;
456 goto error;
459 return;
461 error:
462 ereport(LOG,
463 (errcode_for_file_access(),
464 errmsg("could not write pg_stat_statement file \"%s\": %m",
465 PGSS_DUMP_FILE)));
466 if (file)
467 FreeFile(file);
468 unlink(PGSS_DUMP_FILE);
472 * ExecutorStart hook: start up tracking if needed
474 static void
475 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
477 if (prev_ExecutorStart)
478 prev_ExecutorStart(queryDesc, eflags);
479 else
480 standard_ExecutorStart(queryDesc, eflags);
482 if (pgss_enabled())
485 * Set up to track total elapsed time in ExecutorRun. Make sure
486 * the space is allocated in the per-query context so it will go
487 * away at ExecutorEnd.
489 if (queryDesc->totaltime == NULL)
491 MemoryContext oldcxt;
493 oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
494 queryDesc->totaltime = InstrAlloc(1);
495 MemoryContextSwitchTo(oldcxt);
501 * ExecutorRun hook: all we need do is track nesting depth
503 static void
504 pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
506 nested_level++;
507 PG_TRY();
509 if (prev_ExecutorRun)
510 prev_ExecutorRun(queryDesc, direction, count);
511 else
512 standard_ExecutorRun(queryDesc, direction, count);
513 nested_level--;
515 PG_CATCH();
517 nested_level--;
518 PG_RE_THROW();
520 PG_END_TRY();
524 * ExecutorEnd hook: store results if needed
526 static void
527 pgss_ExecutorEnd(QueryDesc *queryDesc)
529 if (queryDesc->totaltime && pgss_enabled())
532 * Make sure stats accumulation is done. (Note: it's okay if
533 * several levels of hook all do this.)
535 InstrEndLoop(queryDesc->totaltime);
537 pgss_store(queryDesc->sourceText,
538 queryDesc->totaltime,
539 queryDesc->estate->es_processed);
542 if (prev_ExecutorEnd)
543 prev_ExecutorEnd(queryDesc);
544 else
545 standard_ExecutorEnd(queryDesc);
549 * Calculate hash value for a key
551 static uint32
552 pgss_hash_fn(const void *key, Size keysize)
554 const pgssHashKey *k = (const pgssHashKey *) key;
556 /* we don't bother to include encoding in the hash */
557 return hash_uint32((uint32) k->userid) ^
558 hash_uint32((uint32) k->dbid) ^
559 DatumGetUInt32(hash_any((const unsigned char *) k->query_ptr,
560 k->query_len));
564 * Compare two keys - zero means match
566 static int
567 pgss_match_fn(const void *key1, const void *key2, Size keysize)
569 const pgssHashKey *k1 = (const pgssHashKey *) key1;
570 const pgssHashKey *k2 = (const pgssHashKey *) key2;
572 if (k1->userid == k2->userid &&
573 k1->dbid == k2->dbid &&
574 k1->encoding == k2->encoding &&
575 k1->query_len == k2->query_len &&
576 memcmp(k1->query_ptr, k2->query_ptr, k1->query_len) == 0)
577 return 0;
578 else
579 return 1;
583 * Store some statistics for a statement.
585 static void
586 pgss_store(const char *query, const Instrumentation *instr, uint32 rows)
588 pgssHashKey key;
589 double usage;
590 pgssEntry *entry;
592 Assert(query != NULL);
594 /* Safety check... */
595 if (!pgss || !pgss_hash)
596 return;
598 /* Set up key for hashtable search */
599 key.userid = GetUserId();
600 key.dbid = MyDatabaseId;
601 key.encoding = GetDatabaseEncoding();
602 key.query_len = strlen(query);
603 if (key.query_len >= pgss->query_size)
604 key.query_len = pg_encoding_mbcliplen(key.encoding,
605 query,
606 key.query_len,
607 pgss->query_size - 1);
608 key.query_ptr = query;
610 usage = USAGE_EXEC(duration);
612 /* Lookup the hash table entry with shared lock. */
613 LWLockAcquire(pgss->lock, LW_SHARED);
615 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
616 if (!entry)
618 /* Must acquire exclusive lock to add a new entry. */
619 LWLockRelease(pgss->lock);
620 LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
621 entry = entry_alloc(&key);
624 /* Grab the spinlock while updating the counters. */
626 volatile pgssEntry *e = (volatile pgssEntry *) entry;
628 SpinLockAcquire(&e->mutex);
629 e->counters.calls += 1;
630 e->counters.total_time += instr->total;
631 e->counters.rows += rows;
632 e->counters.usage += usage;
633 SpinLockRelease(&e->mutex);
636 LWLockRelease(pgss->lock);
640 * Reset all statement statistics.
642 Datum
643 pg_stat_statements_reset(PG_FUNCTION_ARGS)
645 if (!pgss || !pgss_hash)
646 ereport(ERROR,
647 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
648 errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
649 entry_reset();
650 PG_RETURN_VOID();
653 #define PG_STAT_STATEMENTS_COLS 6
656 * Retrieve statement statistics.
658 Datum
659 pg_stat_statements(PG_FUNCTION_ARGS)
661 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
662 TupleDesc tupdesc;
663 Tuplestorestate *tupstore;
664 MemoryContext per_query_ctx;
665 MemoryContext oldcontext;
666 Oid userid = GetUserId();
667 bool is_superuser = superuser();
668 HASH_SEQ_STATUS hash_seq;
669 pgssEntry *entry;
671 if (!pgss || !pgss_hash)
672 ereport(ERROR,
673 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
674 errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
676 /* check to see if caller supports us returning a tuplestore */
677 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
678 ereport(ERROR,
679 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
680 errmsg("set-valued function called in context that cannot accept a set")));
681 if (!(rsinfo->allowedModes & SFRM_Materialize))
682 ereport(ERROR,
683 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
684 errmsg("materialize mode required, but it is not " \
685 "allowed in this context")));
687 per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
688 oldcontext = MemoryContextSwitchTo(per_query_ctx);
690 tupdesc = CreateTemplateTupleDesc(PG_STAT_STATEMENTS_COLS, false);
691 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "userid",
692 OIDOID, -1, 0);
693 TupleDescInitEntry(tupdesc, (AttrNumber) 2, "dbid",
694 OIDOID, -1, 0);
695 TupleDescInitEntry(tupdesc, (AttrNumber) 3, "query",
696 TEXTOID, -1, 0);
697 TupleDescInitEntry(tupdesc, (AttrNumber) 4, "calls",
698 INT8OID, -1, 0);
699 TupleDescInitEntry(tupdesc, (AttrNumber) 5, "total_time",
700 FLOAT8OID, -1, 0);
701 TupleDescInitEntry(tupdesc, (AttrNumber) 6, "rows",
702 INT8OID, -1, 0);
704 tupstore = tuplestore_begin_heap(true, false, work_mem);
705 rsinfo->returnMode = SFRM_Materialize;
706 rsinfo->setResult = tupstore;
707 rsinfo->setDesc = tupdesc;
709 LWLockAcquire(pgss->lock, LW_SHARED);
711 hash_seq_init(&hash_seq, pgss_hash);
712 while ((entry = hash_seq_search(&hash_seq)) != NULL)
714 Datum values[PG_STAT_STATEMENTS_COLS];
715 bool nulls[PG_STAT_STATEMENTS_COLS];
716 int i = 0;
717 Counters tmp;
719 /* generate junk in short-term context */
720 MemoryContextSwitchTo(oldcontext);
722 memset(values, 0, sizeof(values));
723 memset(nulls, 0, sizeof(nulls));
725 values[i++] = ObjectIdGetDatum(entry->key.userid);
726 values[i++] = ObjectIdGetDatum(entry->key.dbid);
728 if (is_superuser || entry->key.userid == userid)
730 char *qstr;
732 qstr = (char *)
733 pg_do_encoding_conversion((unsigned char *) entry->query,
734 entry->key.query_len,
735 entry->key.encoding,
736 GetDatabaseEncoding());
737 values[i++] = CStringGetTextDatum(qstr);
738 if (qstr != entry->query)
739 pfree(qstr);
741 else
742 values[i++] = CStringGetTextDatum("<insufficient privilege>");
744 /* copy counters to a local variable to keep locking time short */
746 volatile pgssEntry *e = (volatile pgssEntry *) entry;
748 SpinLockAcquire(&e->mutex);
749 tmp = e->counters;
750 SpinLockRelease(&e->mutex);
753 values[i++] = Int64GetDatumFast(tmp.calls);
754 values[i++] = Float8GetDatumFast(tmp.total_time);
755 values[i++] = Int64GetDatumFast(tmp.rows);
757 Assert(i == PG_STAT_STATEMENTS_COLS);
759 /* switch to appropriate context while storing the tuple */
760 MemoryContextSwitchTo(per_query_ctx);
761 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
764 LWLockRelease(pgss->lock);
766 /* clean up and return the tuplestore */
767 tuplestore_donestoring(tupstore);
769 MemoryContextSwitchTo(oldcontext);
771 return (Datum) 0;
775 * Estimate shared memory space needed.
777 static Size
778 pgss_memsize(void)
780 Size size;
781 Size entrysize;
783 size = MAXALIGN(sizeof(pgssSharedState));
784 entrysize = offsetof(pgssEntry, query) + pgstat_track_activity_query_size;
785 size = add_size(size, hash_estimate_size(pgss_max, entrysize));
787 return size;
791 * Allocate a new hashtable entry.
792 * caller must hold an exclusive lock on pgss->lock
794 * Note: despite needing exclusive lock, it's not an error for the target
795 * entry to already exist. This is because pgss_store releases and
796 * reacquires lock after failing to find a match; so someone else could
797 * have made the entry while we waited to get exclusive lock.
799 static pgssEntry *
800 entry_alloc(pgssHashKey *key)
802 pgssEntry *entry;
803 bool found;
805 /* Caller must have clipped query properly */
806 Assert(key->query_len < pgss->query_size);
808 /* Make space if needed */
809 while (hash_get_num_entries(pgss_hash) >= pgss_max)
810 entry_dealloc();
812 /* Find or create an entry with desired hash code */
813 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
815 if (!found)
817 /* New entry, initialize it */
819 /* dynahash tried to copy the key for us, but must fix query_ptr */
820 entry->key.query_ptr = entry->query;
821 /* reset the statistics */
822 memset(&entry->counters, 0, sizeof(Counters));
823 entry->counters.usage = USAGE_INIT;
824 /* re-initialize the mutex each time ... we assume no one using it */
825 SpinLockInit(&entry->mutex);
826 /* ... and don't forget the query text */
827 memcpy(entry->query, key->query_ptr, key->query_len);
828 entry->query[key->query_len] = '\0';
831 return entry;
835 * qsort comparator for sorting into increasing usage order
837 static int
838 entry_cmp(const void *lhs, const void *rhs)
840 double l_usage = (*(const pgssEntry **)lhs)->counters.usage;
841 double r_usage = (*(const pgssEntry **)rhs)->counters.usage;
843 if (l_usage < r_usage)
844 return -1;
845 else if (l_usage > r_usage)
846 return +1;
847 else
848 return 0;
852 * Deallocate least used entries.
853 * Caller must hold an exclusive lock on pgss->lock.
855 static void
856 entry_dealloc(void)
858 HASH_SEQ_STATUS hash_seq;
859 pgssEntry **entries;
860 pgssEntry *entry;
861 int nvictims;
862 int i;
864 /* Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them. */
866 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
868 i = 0;
869 hash_seq_init(&hash_seq, pgss_hash);
870 while ((entry = hash_seq_search(&hash_seq)) != NULL)
872 entries[i++] = entry;
873 entry->counters.usage *= USAGE_DECREASE_FACTOR;
876 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
877 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
878 nvictims = Min(nvictims, i);
880 for (i = 0; i < nvictims; i++)
882 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
885 pfree(entries);
889 * Release all entries.
891 static void
892 entry_reset(void)
894 HASH_SEQ_STATUS hash_seq;
895 pgssEntry *entry;
897 LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
899 hash_seq_init(&hash_seq, pgss_hash);
900 while ((entry = hash_seq_search(&hash_seq)) != NULL)
902 hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
905 LWLockRelease(pgss->lock);