Fix xslt_process() to ensure that it inserts a NULL terminator after the
[PostgreSQL.git] / contrib / pg_stat_statements / pg_stat_statements.c
blobd61d91ae738d8d29e8189d1c3075a2afc46fcc9e
1 /*-------------------------------------------------------------------------
3 * pg_stat_statements.c
4 * Track statement execution times across a whole database cluster.
6 * Note about locking issues: to create or delete an entry in the shared
7 * hashtable, one must hold pgss->lock exclusively. Modifying any field
8 * in an entry except the counters requires the same. To look up an entry,
9 * one must hold the lock shared. To read or update the counters within
10 * an entry, one must hold the lock shared or exclusive (so the entry doesn't
11 * disappear!) and also take the entry's mutex spinlock.
14 * Copyright (c) 2008-2009, PostgreSQL Global Development Group
16 * IDENTIFICATION
17 * $PostgreSQL$
19 *-------------------------------------------------------------------------
21 #include "postgres.h"
23 #include <unistd.h>
25 #include "access/hash.h"
26 #include "catalog/pg_type.h"
27 #include "executor/executor.h"
28 #include "executor/instrument.h"
29 #include "mb/pg_wchar.h"
30 #include "miscadmin.h"
31 #include "pgstat.h"
32 #include "storage/fd.h"
33 #include "storage/ipc.h"
34 #include "storage/spin.h"
35 #include "utils/builtins.h"
36 #include "utils/hsearch.h"
37 #include "utils/guc.h"
40 PG_MODULE_MAGIC;
42 /* Location of stats file */
43 #define PGSS_DUMP_FILE "global/pg_stat_statements.stat"
45 /* This constant defines the magic number in the stats file header */
46 static const uint32 PGSS_FILE_HEADER = 0x20081202;
48 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
49 #define USAGE_EXEC(duration) (1.0)
50 #define USAGE_INIT (1.0) /* including initial planning */
51 #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
52 #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
55 * Hashtable key that defines the identity of a hashtable entry. The
56 * hash comparators do not assume that the query string is null-terminated;
57 * this lets us search for an mbcliplen'd string without copying it first.
59 * Presently, the query encoding is fully determined by the source database
60 * and so we don't really need it to be in the key. But that might not always
61 * be true. Anyway it's notationally convenient to pass it as part of the key.
63 typedef struct pgssHashKey
65 Oid userid; /* user OID */
66 Oid dbid; /* database OID */
67 int encoding; /* query encoding */
68 int query_len; /* # of valid bytes in query string */
69 const char *query_ptr; /* query string proper */
70 } pgssHashKey;
73 * The actual stats counters kept within pgssEntry.
75 typedef struct Counters
77 int64 calls; /* # of times executed */
78 double total_time; /* total execution time in seconds */
79 int64 rows; /* total # of retrieved or affected rows */
80 double usage; /* usage factor */
81 } Counters;
84 * Statistics per statement
86 * NB: see the file read/write code before changing field order here.
88 typedef struct pgssEntry
90 pgssHashKey key; /* hash key of entry - MUST BE FIRST */
91 Counters counters; /* the statistics for this query */
92 slock_t mutex; /* protects the counters only */
93 char query[1]; /* VARIABLE LENGTH ARRAY - MUST BE LAST */
94 /* Note: the allocated length of query[] is actually pgss->query_size */
95 } pgssEntry;
98 * Global shared state
100 typedef struct pgssSharedState
102 LWLockId lock; /* protects hashtable search/modification */
103 int query_size; /* max query length in bytes */
104 } pgssSharedState;
106 /*---- Local variables ----*/
108 /* Current nesting depth of ExecutorRun calls */
109 static int nested_level = 0;
111 /* Saved hook values in case of unload */
112 static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
113 static ExecutorStart_hook_type prev_ExecutorStart = NULL;
114 static ExecutorRun_hook_type prev_ExecutorRun = NULL;
115 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
117 /* Links to shared memory state */
118 static pgssSharedState *pgss = NULL;
119 static HTAB *pgss_hash = NULL;
121 /*---- GUC variables ----*/
123 typedef enum
125 PGSS_TRACK_NONE, /* track no statements */
126 PGSS_TRACK_TOP, /* only top level statements */
127 PGSS_TRACK_ALL, /* all statements, including nested ones */
128 } PGSSTrackLevel;
130 static const struct config_enum_entry track_options[] = {
131 {"none", PGSS_TRACK_NONE, false},
132 {"top", PGSS_TRACK_TOP, false},
133 {"all", PGSS_TRACK_ALL, false},
134 {NULL, 0, false}
137 static int pgss_max; /* max # statements to track */
138 static int pgss_track; /* tracking level */
139 static bool pgss_save; /* whether to save stats across shutdown */
142 #define pgss_enabled() \
143 (pgss_track == PGSS_TRACK_ALL || \
144 (pgss_track == PGSS_TRACK_TOP && nested_level == 0))
146 /*---- Function declarations ----*/
148 void _PG_init(void);
149 void _PG_fini(void);
151 Datum pg_stat_statements_reset(PG_FUNCTION_ARGS);
152 Datum pg_stat_statements(PG_FUNCTION_ARGS);
154 PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
155 PG_FUNCTION_INFO_V1(pg_stat_statements);
157 static void pgss_shmem_startup(void);
158 static void pgss_shmem_shutdown(int code, Datum arg);
159 static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
160 static void pgss_ExecutorRun(QueryDesc *queryDesc,
161 ScanDirection direction,
162 long count);
163 static void pgss_ExecutorEnd(QueryDesc *queryDesc);
164 static uint32 pgss_hash_fn(const void *key, Size keysize);
165 static int pgss_match_fn(const void *key1, const void *key2, Size keysize);
166 static void pgss_store(const char *query,
167 const Instrumentation *instr, uint32 rows);
168 static Size pgss_memsize(void);
169 static pgssEntry *entry_alloc(pgssHashKey *key);
170 static void entry_dealloc(void);
171 static void entry_reset(void);
175 * Module load callback
177 void
178 _PG_init(void)
181 * In order to create our shared memory area, we have to be loaded via
182 * shared_preload_libraries. If not, fall out without hooking into any of
183 * the main system. (We don't throw error here because it seems useful to
184 * allow the pg_stat_statements functions to be created even when the
185 * module isn't active. The functions must protect themselves against
186 * being called then, however.)
188 if (!process_shared_preload_libraries_in_progress)
189 return;
192 * Define (or redefine) custom GUC variables.
194 DefineCustomIntVariable("pg_stat_statements.max",
195 "Sets the maximum number of statements tracked by pg_stat_statements.",
196 NULL,
197 &pgss_max,
198 1000,
199 100,
200 INT_MAX,
201 PGC_POSTMASTER,
203 NULL,
204 NULL);
206 DefineCustomEnumVariable("pg_stat_statements.track",
207 "Selects which statements are tracked by pg_stat_statements.",
208 NULL,
209 &pgss_track,
210 PGSS_TRACK_TOP,
211 track_options,
212 PGC_SUSET,
214 NULL,
215 NULL);
217 DefineCustomBoolVariable("pg_stat_statements.save",
218 "Save pg_stat_statements statistics across server shutdowns.",
219 NULL,
220 &pgss_save,
221 true,
222 PGC_SIGHUP,
224 NULL,
225 NULL);
227 EmitWarningsOnPlaceholders("pg_stat_statements");
230 * Request additional shared resources. (These are no-ops if we're not in
231 * the postmaster process.) We'll allocate or attach to the shared
232 * resources in pgss_shmem_startup().
234 RequestAddinShmemSpace(pgss_memsize());
235 RequestAddinLWLocks(1);
238 * Install hooks.
240 prev_shmem_startup_hook = shmem_startup_hook;
241 shmem_startup_hook = pgss_shmem_startup;
242 prev_ExecutorStart = ExecutorStart_hook;
243 ExecutorStart_hook = pgss_ExecutorStart;
244 prev_ExecutorRun = ExecutorRun_hook;
245 ExecutorRun_hook = pgss_ExecutorRun;
246 prev_ExecutorEnd = ExecutorEnd_hook;
247 ExecutorEnd_hook = pgss_ExecutorEnd;
251 * Module unload callback
253 void
254 _PG_fini(void)
256 /* Uninstall hooks. */
257 ExecutorStart_hook = prev_ExecutorStart;
258 ExecutorRun_hook = prev_ExecutorRun;
259 ExecutorEnd_hook = prev_ExecutorEnd;
260 shmem_startup_hook = prev_shmem_startup_hook;
264 * shmem_startup hook: allocate or attach to shared memory,
265 * then load any pre-existing statistics from file.
267 static void
268 pgss_shmem_startup(void)
270 bool found;
271 HASHCTL info;
272 FILE *file;
273 uint32 header;
274 int32 num;
275 int32 i;
276 int query_size;
277 int buffer_size;
278 char *buffer = NULL;
280 if (prev_shmem_startup_hook)
281 prev_shmem_startup_hook();
283 /* reset in case this is a restart within the postmaster */
284 pgss = NULL;
285 pgss_hash = NULL;
288 * Create or attach to the shared memory state, including hash table
290 LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
292 pgss = ShmemInitStruct("pg_stat_statements",
293 sizeof(pgssSharedState),
294 &found);
295 if (!pgss)
296 elog(ERROR, "out of shared memory");
298 if (!found)
300 /* First time through ... */
301 pgss->lock = LWLockAssign();
302 pgss->query_size = pgstat_track_activity_query_size;
305 /* Be sure everyone agrees on the hash table entry size */
306 query_size = pgss->query_size;
308 memset(&info, 0, sizeof(info));
309 info.keysize = sizeof(pgssHashKey);
310 info.entrysize = offsetof(pgssEntry, query) +query_size;
311 info.hash = pgss_hash_fn;
312 info.match = pgss_match_fn;
313 pgss_hash = ShmemInitHash("pg_stat_statements hash",
314 pgss_max, pgss_max,
315 &info,
316 HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
317 if (!pgss_hash)
318 elog(ERROR, "out of shared memory");
320 LWLockRelease(AddinShmemInitLock);
323 * If we're in the postmaster (or a standalone backend...), set up a shmem
324 * exit hook to dump the statistics to disk.
326 if (!IsUnderPostmaster)
327 on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
330 * Attempt to load old statistics from the dump file.
332 * Note: we don't bother with locks here, because there should be no other
333 * processes running when this is called.
335 if (!pgss_save)
336 return;
338 file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
339 if (file == NULL)
341 if (errno == ENOENT)
342 return; /* ignore not-found error */
343 goto error;
346 buffer_size = query_size;
347 buffer = (char *) palloc(buffer_size);
349 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
350 header != PGSS_FILE_HEADER ||
351 fread(&num, sizeof(int32), 1, file) != 1)
352 goto error;
354 for (i = 0; i < num; i++)
356 pgssEntry temp;
357 pgssEntry *entry;
359 if (fread(&temp, offsetof(pgssEntry, mutex), 1, file) != 1)
360 goto error;
362 /* Encoding is the only field we can easily sanity-check */
363 if (!PG_VALID_BE_ENCODING(temp.key.encoding))
364 goto error;
366 /* Previous incarnation might have had a larger query_size */
367 if (temp.key.query_len >= buffer_size)
369 buffer = (char *) repalloc(buffer, temp.key.query_len + 1);
370 buffer_size = temp.key.query_len + 1;
373 if (fread(buffer, 1, temp.key.query_len, file) != temp.key.query_len)
374 goto error;
375 buffer[temp.key.query_len] = '\0';
377 /* Clip to available length if needed */
378 if (temp.key.query_len >= query_size)
379 temp.key.query_len = pg_encoding_mbcliplen(temp.key.encoding,
380 buffer,
381 temp.key.query_len,
382 query_size - 1);
383 temp.key.query_ptr = buffer;
385 /* make the hashtable entry (discards old entries if too many) */
386 entry = entry_alloc(&temp.key);
388 /* copy in the actual stats */
389 entry->counters = temp.counters;
392 pfree(buffer);
393 FreeFile(file);
394 return;
396 error:
397 ereport(LOG,
398 (errcode_for_file_access(),
399 errmsg("could not read pg_stat_statement file \"%s\": %m",
400 PGSS_DUMP_FILE)));
401 if (buffer)
402 pfree(buffer);
403 if (file)
404 FreeFile(file);
405 /* If possible, throw away the bogus file; ignore any error */
406 unlink(PGSS_DUMP_FILE);
410 * shmem_shutdown hook: Dump statistics into file.
412 * Note: we don't bother with acquiring lock, because there should be no
413 * other processes running when this is called.
415 static void
416 pgss_shmem_shutdown(int code, Datum arg)
418 FILE *file;
419 HASH_SEQ_STATUS hash_seq;
420 int32 num_entries;
421 pgssEntry *entry;
423 /* Don't try to dump during a crash. */
424 if (code)
425 return;
427 /* Safety check ... shouldn't get here unless shmem is set up. */
428 if (!pgss || !pgss_hash)
429 return;
431 /* Don't dump if told not to. */
432 if (!pgss_save)
433 return;
435 file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_W);
436 if (file == NULL)
437 goto error;
439 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
440 goto error;
441 num_entries = hash_get_num_entries(pgss_hash);
442 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
443 goto error;
445 hash_seq_init(&hash_seq, pgss_hash);
446 while ((entry = hash_seq_search(&hash_seq)) != NULL)
448 int len = entry->key.query_len;
450 if (fwrite(entry, offsetof(pgssEntry, mutex), 1, file) != 1 ||
451 fwrite(entry->query, 1, len, file) != len)
452 goto error;
455 if (FreeFile(file))
457 file = NULL;
458 goto error;
461 return;
463 error:
464 ereport(LOG,
465 (errcode_for_file_access(),
466 errmsg("could not write pg_stat_statement file \"%s\": %m",
467 PGSS_DUMP_FILE)));
468 if (file)
469 FreeFile(file);
470 unlink(PGSS_DUMP_FILE);
474 * ExecutorStart hook: start up tracking if needed
476 static void
477 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
479 if (prev_ExecutorStart)
480 prev_ExecutorStart(queryDesc, eflags);
481 else
482 standard_ExecutorStart(queryDesc, eflags);
484 if (pgss_enabled())
487 * Set up to track total elapsed time in ExecutorRun. Make sure the
488 * space is allocated in the per-query context so it will go away at
489 * ExecutorEnd.
491 if (queryDesc->totaltime == NULL)
493 MemoryContext oldcxt;
495 oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
496 queryDesc->totaltime = InstrAlloc(1);
497 MemoryContextSwitchTo(oldcxt);
503 * ExecutorRun hook: all we need do is track nesting depth
505 static void
506 pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
508 nested_level++;
509 PG_TRY();
511 if (prev_ExecutorRun)
512 prev_ExecutorRun(queryDesc, direction, count);
513 else
514 standard_ExecutorRun(queryDesc, direction, count);
515 nested_level--;
517 PG_CATCH();
519 nested_level--;
520 PG_RE_THROW();
522 PG_END_TRY();
526 * ExecutorEnd hook: store results if needed
528 static void
529 pgss_ExecutorEnd(QueryDesc *queryDesc)
531 if (queryDesc->totaltime && pgss_enabled())
534 * Make sure stats accumulation is done. (Note: it's okay if several
535 * levels of hook all do this.)
537 InstrEndLoop(queryDesc->totaltime);
539 pgss_store(queryDesc->sourceText,
540 queryDesc->totaltime,
541 queryDesc->estate->es_processed);
544 if (prev_ExecutorEnd)
545 prev_ExecutorEnd(queryDesc);
546 else
547 standard_ExecutorEnd(queryDesc);
551 * Calculate hash value for a key
553 static uint32
554 pgss_hash_fn(const void *key, Size keysize)
556 const pgssHashKey *k = (const pgssHashKey *) key;
558 /* we don't bother to include encoding in the hash */
559 return hash_uint32((uint32) k->userid) ^
560 hash_uint32((uint32) k->dbid) ^
561 DatumGetUInt32(hash_any((const unsigned char *) k->query_ptr,
562 k->query_len));
566 * Compare two keys - zero means match
568 static int
569 pgss_match_fn(const void *key1, const void *key2, Size keysize)
571 const pgssHashKey *k1 = (const pgssHashKey *) key1;
572 const pgssHashKey *k2 = (const pgssHashKey *) key2;
574 if (k1->userid == k2->userid &&
575 k1->dbid == k2->dbid &&
576 k1->encoding == k2->encoding &&
577 k1->query_len == k2->query_len &&
578 memcmp(k1->query_ptr, k2->query_ptr, k1->query_len) == 0)
579 return 0;
580 else
581 return 1;
585 * Store some statistics for a statement.
587 static void
588 pgss_store(const char *query, const Instrumentation *instr, uint32 rows)
590 pgssHashKey key;
591 double usage;
592 pgssEntry *entry;
594 Assert(query != NULL);
596 /* Safety check... */
597 if (!pgss || !pgss_hash)
598 return;
600 /* Set up key for hashtable search */
601 key.userid = GetUserId();
602 key.dbid = MyDatabaseId;
603 key.encoding = GetDatabaseEncoding();
604 key.query_len = strlen(query);
605 if (key.query_len >= pgss->query_size)
606 key.query_len = pg_encoding_mbcliplen(key.encoding,
607 query,
608 key.query_len,
609 pgss->query_size - 1);
610 key.query_ptr = query;
612 usage = USAGE_EXEC(duration);
614 /* Lookup the hash table entry with shared lock. */
615 LWLockAcquire(pgss->lock, LW_SHARED);
617 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
618 if (!entry)
620 /* Must acquire exclusive lock to add a new entry. */
621 LWLockRelease(pgss->lock);
622 LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
623 entry = entry_alloc(&key);
626 /* Grab the spinlock while updating the counters. */
628 volatile pgssEntry *e = (volatile pgssEntry *) entry;
630 SpinLockAcquire(&e->mutex);
631 e->counters.calls += 1;
632 e->counters.total_time += instr->total;
633 e->counters.rows += rows;
634 e->counters.usage += usage;
635 SpinLockRelease(&e->mutex);
638 LWLockRelease(pgss->lock);
642 * Reset all statement statistics.
644 Datum
645 pg_stat_statements_reset(PG_FUNCTION_ARGS)
647 if (!pgss || !pgss_hash)
648 ereport(ERROR,
649 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
650 errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
651 entry_reset();
652 PG_RETURN_VOID();
655 #define PG_STAT_STATEMENTS_COLS 6
658 * Retrieve statement statistics.
660 Datum
661 pg_stat_statements(PG_FUNCTION_ARGS)
663 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
664 TupleDesc tupdesc;
665 Tuplestorestate *tupstore;
666 MemoryContext per_query_ctx;
667 MemoryContext oldcontext;
668 Oid userid = GetUserId();
669 bool is_superuser = superuser();
670 HASH_SEQ_STATUS hash_seq;
671 pgssEntry *entry;
673 if (!pgss || !pgss_hash)
674 ereport(ERROR,
675 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
676 errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
678 /* check to see if caller supports us returning a tuplestore */
679 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
680 ereport(ERROR,
681 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
682 errmsg("set-valued function called in context that cannot accept a set")));
683 if (!(rsinfo->allowedModes & SFRM_Materialize))
684 ereport(ERROR,
685 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
686 errmsg("materialize mode required, but it is not " \
687 "allowed in this context")));
689 per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
690 oldcontext = MemoryContextSwitchTo(per_query_ctx);
692 tupdesc = CreateTemplateTupleDesc(PG_STAT_STATEMENTS_COLS, false);
693 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "userid",
694 OIDOID, -1, 0);
695 TupleDescInitEntry(tupdesc, (AttrNumber) 2, "dbid",
696 OIDOID, -1, 0);
697 TupleDescInitEntry(tupdesc, (AttrNumber) 3, "query",
698 TEXTOID, -1, 0);
699 TupleDescInitEntry(tupdesc, (AttrNumber) 4, "calls",
700 INT8OID, -1, 0);
701 TupleDescInitEntry(tupdesc, (AttrNumber) 5, "total_time",
702 FLOAT8OID, -1, 0);
703 TupleDescInitEntry(tupdesc, (AttrNumber) 6, "rows",
704 INT8OID, -1, 0);
706 tupstore = tuplestore_begin_heap(true, false, work_mem);
707 rsinfo->returnMode = SFRM_Materialize;
708 rsinfo->setResult = tupstore;
709 rsinfo->setDesc = tupdesc;
711 LWLockAcquire(pgss->lock, LW_SHARED);
713 hash_seq_init(&hash_seq, pgss_hash);
714 while ((entry = hash_seq_search(&hash_seq)) != NULL)
716 Datum values[PG_STAT_STATEMENTS_COLS];
717 bool nulls[PG_STAT_STATEMENTS_COLS];
718 int i = 0;
719 Counters tmp;
721 /* generate junk in short-term context */
722 MemoryContextSwitchTo(oldcontext);
724 memset(values, 0, sizeof(values));
725 memset(nulls, 0, sizeof(nulls));
727 values[i++] = ObjectIdGetDatum(entry->key.userid);
728 values[i++] = ObjectIdGetDatum(entry->key.dbid);
730 if (is_superuser || entry->key.userid == userid)
732 char *qstr;
734 qstr = (char *)
735 pg_do_encoding_conversion((unsigned char *) entry->query,
736 entry->key.query_len,
737 entry->key.encoding,
738 GetDatabaseEncoding());
739 values[i++] = CStringGetTextDatum(qstr);
740 if (qstr != entry->query)
741 pfree(qstr);
743 else
744 values[i++] = CStringGetTextDatum("<insufficient privilege>");
746 /* copy counters to a local variable to keep locking time short */
748 volatile pgssEntry *e = (volatile pgssEntry *) entry;
750 SpinLockAcquire(&e->mutex);
751 tmp = e->counters;
752 SpinLockRelease(&e->mutex);
755 values[i++] = Int64GetDatumFast(tmp.calls);
756 values[i++] = Float8GetDatumFast(tmp.total_time);
757 values[i++] = Int64GetDatumFast(tmp.rows);
759 Assert(i == PG_STAT_STATEMENTS_COLS);
761 /* switch to appropriate context while storing the tuple */
762 MemoryContextSwitchTo(per_query_ctx);
763 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
766 LWLockRelease(pgss->lock);
768 /* clean up and return the tuplestore */
769 tuplestore_donestoring(tupstore);
771 MemoryContextSwitchTo(oldcontext);
773 return (Datum) 0;
777 * Estimate shared memory space needed.
779 static Size
780 pgss_memsize(void)
782 Size size;
783 Size entrysize;
785 size = MAXALIGN(sizeof(pgssSharedState));
786 entrysize = offsetof(pgssEntry, query) +pgstat_track_activity_query_size;
787 size = add_size(size, hash_estimate_size(pgss_max, entrysize));
789 return size;
793 * Allocate a new hashtable entry.
794 * caller must hold an exclusive lock on pgss->lock
796 * Note: despite needing exclusive lock, it's not an error for the target
797 * entry to already exist. This is because pgss_store releases and
798 * reacquires lock after failing to find a match; so someone else could
799 * have made the entry while we waited to get exclusive lock.
801 static pgssEntry *
802 entry_alloc(pgssHashKey *key)
804 pgssEntry *entry;
805 bool found;
807 /* Caller must have clipped query properly */
808 Assert(key->query_len < pgss->query_size);
810 /* Make space if needed */
811 while (hash_get_num_entries(pgss_hash) >= pgss_max)
812 entry_dealloc();
814 /* Find or create an entry with desired hash code */
815 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
817 if (!found)
819 /* New entry, initialize it */
821 /* dynahash tried to copy the key for us, but must fix query_ptr */
822 entry->key.query_ptr = entry->query;
823 /* reset the statistics */
824 memset(&entry->counters, 0, sizeof(Counters));
825 entry->counters.usage = USAGE_INIT;
826 /* re-initialize the mutex each time ... we assume no one using it */
827 SpinLockInit(&entry->mutex);
828 /* ... and don't forget the query text */
829 memcpy(entry->query, key->query_ptr, key->query_len);
830 entry->query[key->query_len] = '\0';
833 return entry;
837 * qsort comparator for sorting into increasing usage order
839 static int
840 entry_cmp(const void *lhs, const void *rhs)
842 double l_usage = (*(const pgssEntry **) lhs)->counters.usage;
843 double r_usage = (*(const pgssEntry **) rhs)->counters.usage;
845 if (l_usage < r_usage)
846 return -1;
847 else if (l_usage > r_usage)
848 return +1;
849 else
850 return 0;
854 * Deallocate least used entries.
855 * Caller must hold an exclusive lock on pgss->lock.
857 static void
858 entry_dealloc(void)
860 HASH_SEQ_STATUS hash_seq;
861 pgssEntry **entries;
862 pgssEntry *entry;
863 int nvictims;
864 int i;
866 /* Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them. */
868 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
870 i = 0;
871 hash_seq_init(&hash_seq, pgss_hash);
872 while ((entry = hash_seq_search(&hash_seq)) != NULL)
874 entries[i++] = entry;
875 entry->counters.usage *= USAGE_DECREASE_FACTOR;
878 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
879 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
880 nvictims = Min(nvictims, i);
882 for (i = 0; i < nvictims; i++)
884 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
887 pfree(entries);
891 * Release all entries.
893 static void
894 entry_reset(void)
896 HASH_SEQ_STATUS hash_seq;
897 pgssEntry *entry;
899 LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
901 hash_seq_init(&hash_seq, pgss_hash);
902 while ((entry = hash_seq_search(&hash_seq)) != NULL)
904 hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
907 LWLockRelease(pgss->lock);