1 /*-------------------------------------------------------------------------
4 * the Postgres statistics generator
6 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/commands/analyze.c
13 *-------------------------------------------------------------------------
19 #include "access/detoast.h"
20 #include "access/genam.h"
21 #include "access/multixact.h"
22 #include "access/relation.h"
23 #include "access/table.h"
24 #include "access/tableam.h"
25 #include "access/transam.h"
26 #include "access/tupconvert.h"
27 #include "access/visibilitymap.h"
28 #include "access/xact.h"
29 #include "catalog/index.h"
30 #include "catalog/indexing.h"
31 #include "catalog/pg_inherits.h"
32 #include "commands/dbcommands.h"
33 #include "commands/progress.h"
34 #include "commands/tablecmds.h"
35 #include "commands/vacuum.h"
36 #include "common/pg_prng.h"
37 #include "executor/executor.h"
38 #include "foreign/fdwapi.h"
39 #include "miscadmin.h"
40 #include "nodes/nodeFuncs.h"
41 #include "parser/parse_oper.h"
42 #include "parser/parse_relation.h"
44 #include "postmaster/autovacuum.h"
45 #include "statistics/extended_stats_internal.h"
46 #include "statistics/statistics.h"
47 #include "storage/bufmgr.h"
48 #include "storage/procarray.h"
49 #include "utils/attoptcache.h"
50 #include "utils/datum.h"
51 #include "utils/guc.h"
52 #include "utils/lsyscache.h"
53 #include "utils/memutils.h"
54 #include "utils/pg_rusage.h"
55 #include "utils/sampling.h"
56 #include "utils/sortsupport.h"
57 #include "utils/spccache.h"
58 #include "utils/syscache.h"
59 #include "utils/timestamp.h"
62 /* Per-index data for ANALYZE */
63 typedef struct AnlIndexData
65 IndexInfo
*indexInfo
; /* BuildIndexInfo result */
66 double tupleFract
; /* fraction of rows for partial index */
67 VacAttrStats
**vacattrstats
; /* index attrs to analyze */
72 /* Default statistics target (GUC parameter) */
73 int default_statistics_target
= 100;
75 /* A few variables that don't seem worth passing around as parameters */
76 static MemoryContext anl_context
= NULL
;
77 static BufferAccessStrategy vac_strategy
;
80 static void do_analyze_rel(Relation onerel
,
81 VacuumParams
*params
, List
*va_cols
,
82 AcquireSampleRowsFunc acquirefunc
, BlockNumber relpages
,
83 bool inh
, bool in_outer_xact
, int elevel
);
84 static void compute_index_stats(Relation onerel
, double totalrows
,
85 AnlIndexData
*indexdata
, int nindexes
,
86 HeapTuple
*rows
, int numrows
,
87 MemoryContext col_context
);
88 static VacAttrStats
*examine_attribute(Relation onerel
, int attnum
,
90 static int acquire_sample_rows(Relation onerel
, int elevel
,
91 HeapTuple
*rows
, int targrows
,
92 double *totalrows
, double *totaldeadrows
);
93 static int compare_rows(const void *a
, const void *b
, void *arg
);
94 static int acquire_inherited_sample_rows(Relation onerel
, int elevel
,
95 HeapTuple
*rows
, int targrows
,
96 double *totalrows
, double *totaldeadrows
);
97 static void update_attstats(Oid relid
, bool inh
,
98 int natts
, VacAttrStats
**vacattrstats
);
99 static Datum
std_fetch_func(VacAttrStatsP stats
, int rownum
, bool *isNull
);
100 static Datum
ind_fetch_func(VacAttrStatsP stats
, int rownum
, bool *isNull
);
104 * analyze_rel() -- analyze one relation
106 * relid identifies the relation to analyze. If relation is supplied, use
107 * the name therein for reporting any failure to open/lock the rel; do not
108 * use it once we've successfully opened the rel, since it might be stale.
111 analyze_rel(Oid relid
, RangeVar
*relation
,
112 VacuumParams
*params
, List
*va_cols
, bool in_outer_xact
,
113 BufferAccessStrategy bstrategy
)
117 AcquireSampleRowsFunc acquirefunc
= NULL
;
118 BlockNumber relpages
= 0;
120 /* Select logging level */
121 if (params
->options
& VACOPT_VERBOSE
)
126 /* Set up static variables */
127 vac_strategy
= bstrategy
;
130 * Check for user-requested abort.
132 CHECK_FOR_INTERRUPTS();
135 * Open the relation, getting ShareUpdateExclusiveLock to ensure that two
136 * ANALYZEs don't run on it concurrently. (This also locks out a
137 * concurrent VACUUM, which doesn't matter much at the moment but might
138 * matter if we ever try to accumulate stats on dead tuples.) If the rel
139 * has been dropped since we last saw it, we don't need to process it.
141 * Make sure to generate only logs for ANALYZE in this case.
143 onerel
= vacuum_open_relation(relid
, relation
, params
->options
& ~(VACOPT_VACUUM
),
144 params
->log_min_duration
>= 0,
145 ShareUpdateExclusiveLock
);
147 /* leave if relation could not be opened or locked */
152 * Check if relation needs to be skipped based on privileges. This check
153 * happens also when building the relation list to analyze for a manual
154 * operation, and needs to be done additionally here as ANALYZE could
155 * happen across multiple transactions where privileges could have changed
156 * in-between. Make sure to generate only logs for ANALYZE in this case.
158 if (!vacuum_is_permitted_for_relation(RelationGetRelid(onerel
),
160 params
->options
& ~VACOPT_VACUUM
))
162 relation_close(onerel
, ShareUpdateExclusiveLock
);
167 * Silently ignore tables that are temp tables of other backends ---
168 * trying to analyze these is rather pointless, since their contents are
169 * probably not up-to-date on disk. (We don't throw a warning here; it
170 * would just lead to chatter during a database-wide ANALYZE.)
172 if (RELATION_IS_OTHER_TEMP(onerel
))
174 relation_close(onerel
, ShareUpdateExclusiveLock
);
179 * We can ANALYZE any table except pg_statistic. See update_attstats
181 if (RelationGetRelid(onerel
) == StatisticRelationId
)
183 relation_close(onerel
, ShareUpdateExclusiveLock
);
188 * Check that it's of an analyzable relkind, and set up appropriately.
190 if (onerel
->rd_rel
->relkind
== RELKIND_RELATION
||
191 onerel
->rd_rel
->relkind
== RELKIND_MATVIEW
)
193 /* Regular table, so we'll use the regular row acquisition function */
194 acquirefunc
= acquire_sample_rows
;
195 /* Also get regular table's size */
196 relpages
= RelationGetNumberOfBlocks(onerel
);
198 else if (onerel
->rd_rel
->relkind
== RELKIND_FOREIGN_TABLE
)
201 * For a foreign table, call the FDW's hook function to see whether it
204 FdwRoutine
*fdwroutine
;
207 fdwroutine
= GetFdwRoutineForRelation(onerel
, false);
209 if (fdwroutine
->AnalyzeForeignTable
!= NULL
)
210 ok
= fdwroutine
->AnalyzeForeignTable(onerel
,
217 (errmsg("skipping \"%s\" --- cannot analyze this foreign table",
218 RelationGetRelationName(onerel
))));
219 relation_close(onerel
, ShareUpdateExclusiveLock
);
223 else if (onerel
->rd_rel
->relkind
== RELKIND_PARTITIONED_TABLE
)
226 * For partitioned tables, we want to do the recursive ANALYZE below.
231 /* No need for a WARNING if we already complained during VACUUM */
232 if (!(params
->options
& VACOPT_VACUUM
))
234 (errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables",
235 RelationGetRelationName(onerel
))));
236 relation_close(onerel
, ShareUpdateExclusiveLock
);
241 * OK, let's do it. First, initialize progress reporting.
243 pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE
,
244 RelationGetRelid(onerel
));
247 * Do the normal non-recursive ANALYZE. We can skip this for partitioned
248 * tables, which don't contain any rows.
250 if (onerel
->rd_rel
->relkind
!= RELKIND_PARTITIONED_TABLE
)
251 do_analyze_rel(onerel
, params
, va_cols
, acquirefunc
,
252 relpages
, false, in_outer_xact
, elevel
);
255 * If there are child tables, do recursive ANALYZE.
257 if (onerel
->rd_rel
->relhassubclass
)
258 do_analyze_rel(onerel
, params
, va_cols
, acquirefunc
, relpages
,
259 true, in_outer_xact
, elevel
);
262 * Close source relation now, but keep lock so that no one deletes it
263 * before we commit. (If someone did, they'd fail to clean up the entries
264 * we made in pg_statistic. Also, releasing the lock before commit would
265 * expose us to concurrent-update failures in update_attstats.)
267 relation_close(onerel
, NoLock
);
269 pgstat_progress_end_command();
273 * do_analyze_rel() -- analyze one relation, recursively or not
275 * Note that "acquirefunc" is only relevant for the non-inherited case.
276 * For the inherited case, acquire_inherited_sample_rows() determines the
277 * appropriate acquirefunc for each child table.
280 do_analyze_rel(Relation onerel
, VacuumParams
*params
,
281 List
*va_cols
, AcquireSampleRowsFunc acquirefunc
,
282 BlockNumber relpages
, bool inh
, bool in_outer_xact
,
292 VacAttrStats
**vacattrstats
;
293 AnlIndexData
*indexdata
;
301 TimestampTz starttime
= 0;
302 MemoryContext caller_context
;
304 int save_sec_context
;
306 int64 AnalyzePageHit
= VacuumPageHit
;
307 int64 AnalyzePageMiss
= VacuumPageMiss
;
308 int64 AnalyzePageDirty
= VacuumPageDirty
;
309 PgStat_Counter startreadtime
= 0;
310 PgStat_Counter startwritetime
= 0;
314 (errmsg("analyzing \"%s.%s\" inheritance tree",
315 get_namespace_name(RelationGetNamespace(onerel
)),
316 RelationGetRelationName(onerel
))));
319 (errmsg("analyzing \"%s.%s\"",
320 get_namespace_name(RelationGetNamespace(onerel
)),
321 RelationGetRelationName(onerel
))));
324 * Set up a working context so that we can easily free whatever junk gets
327 anl_context
= AllocSetContextCreate(CurrentMemoryContext
,
329 ALLOCSET_DEFAULT_SIZES
);
330 caller_context
= MemoryContextSwitchTo(anl_context
);
333 * Switch to the table owner's userid, so that any index functions are run
334 * as that user. Also lock down security-restricted operations and
335 * arrange to make GUC variable changes local to this command.
337 GetUserIdAndSecContext(&save_userid
, &save_sec_context
);
338 SetUserIdAndSecContext(onerel
->rd_rel
->relowner
,
339 save_sec_context
| SECURITY_RESTRICTED_OPERATION
);
340 save_nestlevel
= NewGUCNestLevel();
341 RestrictSearchPath();
343 /* measure elapsed time iff autovacuum logging requires it */
344 if (AmAutoVacuumWorkerProcess() && params
->log_min_duration
>= 0)
348 startreadtime
= pgStatBlockReadTime
;
349 startwritetime
= pgStatBlockWriteTime
;
352 pg_rusage_init(&ru0
);
353 starttime
= GetCurrentTimestamp();
357 * Determine which columns to analyze
359 * Note that system attributes are never analyzed, so we just reject them
360 * at the lookup stage. We also reject duplicate column mentions. (We
361 * could alternatively ignore duplicates, but analyzing a column twice
362 * won't work; we'd end up making a conflicting update in pg_statistic.)
366 Bitmapset
*unique_cols
= NULL
;
369 vacattrstats
= (VacAttrStats
**) palloc(list_length(va_cols
) *
370 sizeof(VacAttrStats
*));
374 char *col
= strVal(lfirst(le
));
376 i
= attnameAttNum(onerel
, col
, false);
377 if (i
== InvalidAttrNumber
)
379 (errcode(ERRCODE_UNDEFINED_COLUMN
),
380 errmsg("column \"%s\" of relation \"%s\" does not exist",
381 col
, RelationGetRelationName(onerel
))));
382 if (bms_is_member(i
, unique_cols
))
384 (errcode(ERRCODE_DUPLICATE_COLUMN
),
385 errmsg("column \"%s\" of relation \"%s\" appears more than once",
386 col
, RelationGetRelationName(onerel
))));
387 unique_cols
= bms_add_member(unique_cols
, i
);
389 vacattrstats
[tcnt
] = examine_attribute(onerel
, i
, NULL
);
390 if (vacattrstats
[tcnt
] != NULL
)
397 attr_cnt
= onerel
->rd_att
->natts
;
398 vacattrstats
= (VacAttrStats
**)
399 palloc(attr_cnt
* sizeof(VacAttrStats
*));
401 for (i
= 1; i
<= attr_cnt
; i
++)
403 vacattrstats
[tcnt
] = examine_attribute(onerel
, i
, NULL
);
404 if (vacattrstats
[tcnt
] != NULL
)
411 * Open all indexes of the relation, and see if there are any analyzable
412 * columns in the indexes. We do not analyze index columns if there was
413 * an explicit column list in the ANALYZE command, however.
415 * If we are doing a recursive scan, we don't want to touch the parent's
416 * indexes at all. If we're processing a partitioned table, we need to
417 * know if there are any indexes, but we don't want to process them.
419 if (onerel
->rd_rel
->relkind
== RELKIND_PARTITIONED_TABLE
)
421 List
*idxs
= RelationGetIndexList(onerel
);
425 hasindex
= idxs
!= NIL
;
430 vac_open_indexes(onerel
, AccessShareLock
, &nindexes
, &Irel
);
431 hasindex
= nindexes
> 0;
442 indexdata
= (AnlIndexData
*) palloc0(nindexes
* sizeof(AnlIndexData
));
443 for (ind
= 0; ind
< nindexes
; ind
++)
445 AnlIndexData
*thisdata
= &indexdata
[ind
];
446 IndexInfo
*indexInfo
;
448 thisdata
->indexInfo
= indexInfo
= BuildIndexInfo(Irel
[ind
]);
449 thisdata
->tupleFract
= 1.0; /* fix later if partial */
450 if (indexInfo
->ii_Expressions
!= NIL
&& va_cols
== NIL
)
452 ListCell
*indexpr_item
= list_head(indexInfo
->ii_Expressions
);
454 thisdata
->vacattrstats
= (VacAttrStats
**)
455 palloc(indexInfo
->ii_NumIndexAttrs
* sizeof(VacAttrStats
*));
457 for (i
= 0; i
< indexInfo
->ii_NumIndexAttrs
; i
++)
459 int keycol
= indexInfo
->ii_IndexAttrNumbers
[i
];
463 /* Found an index expression */
466 if (indexpr_item
== NULL
) /* shouldn't happen */
467 elog(ERROR
, "too few entries in indexprs list");
468 indexkey
= (Node
*) lfirst(indexpr_item
);
469 indexpr_item
= lnext(indexInfo
->ii_Expressions
,
471 thisdata
->vacattrstats
[tcnt
] =
472 examine_attribute(Irel
[ind
], i
+ 1, indexkey
);
473 if (thisdata
->vacattrstats
[tcnt
] != NULL
)
477 thisdata
->attr_cnt
= tcnt
;
483 * Determine how many rows we need to sample, using the worst case from
484 * all analyzable columns. We use a lower bound of 100 rows to avoid
485 * possible overflow in Vitter's algorithm. (Note: that will also be the
486 * target in the corner case where there are no analyzable columns.)
489 for (i
= 0; i
< attr_cnt
; i
++)
491 if (targrows
< vacattrstats
[i
]->minrows
)
492 targrows
= vacattrstats
[i
]->minrows
;
494 for (ind
= 0; ind
< nindexes
; ind
++)
496 AnlIndexData
*thisdata
= &indexdata
[ind
];
498 for (i
= 0; i
< thisdata
->attr_cnt
; i
++)
500 if (targrows
< thisdata
->vacattrstats
[i
]->minrows
)
501 targrows
= thisdata
->vacattrstats
[i
]->minrows
;
506 * Look at extended statistics objects too, as those may define custom
507 * statistics target. So we may need to sample more rows and then build
508 * the statistics with enough detail.
510 minrows
= ComputeExtStatisticsRows(onerel
, attr_cnt
, vacattrstats
);
512 if (targrows
< minrows
)
516 * Acquire the sample rows
518 rows
= (HeapTuple
*) palloc(targrows
* sizeof(HeapTuple
));
519 pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE
,
520 inh
? PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS_INH
:
521 PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS
);
523 numrows
= acquire_inherited_sample_rows(onerel
, elevel
,
525 &totalrows
, &totaldeadrows
);
527 numrows
= (*acquirefunc
) (onerel
, elevel
,
529 &totalrows
, &totaldeadrows
);
532 * Compute the statistics. Temporary results during the calculations for
533 * each column are stored in a child context. The calc routines are
534 * responsible to make sure that whatever they store into the VacAttrStats
535 * structure is allocated in anl_context.
539 MemoryContext col_context
,
542 pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE
,
543 PROGRESS_ANALYZE_PHASE_COMPUTE_STATS
);
545 col_context
= AllocSetContextCreate(anl_context
,
547 ALLOCSET_DEFAULT_SIZES
);
548 old_context
= MemoryContextSwitchTo(col_context
);
550 for (i
= 0; i
< attr_cnt
; i
++)
552 VacAttrStats
*stats
= vacattrstats
[i
];
556 stats
->tupDesc
= onerel
->rd_att
;
557 stats
->compute_stats(stats
,
563 * If the appropriate flavor of the n_distinct option is
564 * specified, override with the corresponding value.
566 aopt
= get_attribute_options(onerel
->rd_id
, stats
->tupattnum
);
571 n_distinct
= inh
? aopt
->n_distinct_inherited
: aopt
->n_distinct
;
572 if (n_distinct
!= 0.0)
573 stats
->stadistinct
= n_distinct
;
576 MemoryContextReset(col_context
);
580 compute_index_stats(onerel
, totalrows
,
585 MemoryContextSwitchTo(old_context
);
586 MemoryContextDelete(col_context
);
589 * Emit the completed stats rows into pg_statistic, replacing any
590 * previous statistics for the target columns. (If there are stats in
591 * pg_statistic for columns we didn't process, we leave them alone.)
593 update_attstats(RelationGetRelid(onerel
), inh
,
594 attr_cnt
, vacattrstats
);
596 for (ind
= 0; ind
< nindexes
; ind
++)
598 AnlIndexData
*thisdata
= &indexdata
[ind
];
600 update_attstats(RelationGetRelid(Irel
[ind
]), false,
601 thisdata
->attr_cnt
, thisdata
->vacattrstats
);
604 /* Build extended statistics (if there are any). */
605 BuildRelationExtStatistics(onerel
, inh
, totalrows
, numrows
, rows
,
606 attr_cnt
, vacattrstats
);
609 pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE
,
610 PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE
);
613 * Update pages/tuples stats in pg_class ... but not if we're doing
616 * We assume that VACUUM hasn't set pg_class.reltuples already, even
617 * during a VACUUM ANALYZE. Although VACUUM often updates pg_class,
618 * exceptions exist. A "VACUUM (ANALYZE, INDEX_CLEANUP OFF)" command will
619 * never update pg_class entries for index relations. It's also possible
620 * that an individual index's pg_class entry won't be updated during
621 * VACUUM if the index AM returns NULL from its amvacuumcleanup() routine.
625 BlockNumber relallvisible
;
627 if (RELKIND_HAS_STORAGE(onerel
->rd_rel
->relkind
))
628 visibilitymap_count(onerel
, &relallvisible
, NULL
);
632 /* Update pg_class for table relation */
633 vac_update_relstats(onerel
,
638 InvalidTransactionId
,
643 /* Same for indexes */
644 for (ind
= 0; ind
< nindexes
; ind
++)
646 AnlIndexData
*thisdata
= &indexdata
[ind
];
647 double totalindexrows
;
649 totalindexrows
= ceil(thisdata
->tupleFract
* totalrows
);
650 vac_update_relstats(Irel
[ind
],
651 RelationGetNumberOfBlocks(Irel
[ind
]),
655 InvalidTransactionId
,
661 else if (onerel
->rd_rel
->relkind
== RELKIND_PARTITIONED_TABLE
)
664 * Partitioned tables don't have storage, so we don't set any fields
665 * in their pg_class entries except for reltuples and relhasindex.
667 vac_update_relstats(onerel
, -1, totalrows
,
668 0, hasindex
, InvalidTransactionId
,
675 * Now report ANALYZE to the cumulative stats system. For regular tables,
676 * we do it only if not doing inherited stats. For partitioned tables, we
677 * only do it for inherited stats. (We're never called for not-inherited
678 * stats on partitioned tables anyway.)
680 * Reset the changes_since_analyze counter only if we analyzed all
681 * columns; otherwise, there is still work for auto-analyze to do.
684 pgstat_report_analyze(onerel
, totalrows
, totaldeadrows
,
686 else if (onerel
->rd_rel
->relkind
== RELKIND_PARTITIONED_TABLE
)
687 pgstat_report_analyze(onerel
, 0, 0, (va_cols
== NIL
));
690 * If this isn't part of VACUUM ANALYZE, let index AMs do cleanup.
692 * Note that most index AMs perform a no-op as a matter of policy for
693 * amvacuumcleanup() when called in ANALYZE-only mode. The only exception
694 * among core index AMs is GIN/ginvacuumcleanup().
696 if (!(params
->options
& VACOPT_VACUUM
))
698 for (ind
= 0; ind
< nindexes
; ind
++)
700 IndexBulkDeleteResult
*stats
;
701 IndexVacuumInfo ivinfo
;
703 ivinfo
.index
= Irel
[ind
];
704 ivinfo
.heaprel
= onerel
;
705 ivinfo
.analyze_only
= true;
706 ivinfo
.estimated_count
= true;
707 ivinfo
.message_level
= elevel
;
708 ivinfo
.num_heap_tuples
= onerel
->rd_rel
->reltuples
;
709 ivinfo
.strategy
= vac_strategy
;
711 stats
= index_vacuum_cleanup(&ivinfo
, NULL
);
718 /* Done with indexes */
719 vac_close_indexes(nindexes
, Irel
, NoLock
);
721 /* Log the action if appropriate */
722 if (AmAutoVacuumWorkerProcess() && params
->log_min_duration
>= 0)
724 TimestampTz endtime
= GetCurrentTimestamp();
726 if (params
->log_min_duration
== 0 ||
727 TimestampDifferenceExceeds(starttime
, endtime
,
728 params
->log_min_duration
))
731 double read_rate
= 0;
732 double write_rate
= 0;
736 * Calculate the difference in the Page Hit/Miss/Dirty that
737 * happened as part of the analyze by subtracting out the
738 * pre-analyze values which we saved above.
740 AnalyzePageHit
= VacuumPageHit
- AnalyzePageHit
;
741 AnalyzePageMiss
= VacuumPageMiss
- AnalyzePageMiss
;
742 AnalyzePageDirty
= VacuumPageDirty
- AnalyzePageDirty
;
745 * We do not expect an analyze to take > 25 days and it simplifies
746 * things a bit to use TimestampDifferenceMilliseconds.
748 delay_in_ms
= TimestampDifferenceMilliseconds(starttime
, endtime
);
751 * Note that we are reporting these read/write rates in the same
752 * manner as VACUUM does, which means that while the 'average read
753 * rate' here actually corresponds to page misses and resulting
754 * reads which are also picked up by track_io_timing, if enabled,
755 * the 'average write rate' is actually talking about the rate of
756 * pages being dirtied, not being written out, so it's typical to
757 * have a non-zero 'avg write rate' while I/O timings only reports
760 * It's not clear that an ANALYZE will ever result in
761 * FlushBuffer() being called, but we track and support reporting
762 * on I/O write time in case that changes as it's practically free
768 read_rate
= (double) BLCKSZ
* AnalyzePageMiss
/ (1024 * 1024) /
769 (delay_in_ms
/ 1000.0);
770 write_rate
= (double) BLCKSZ
* AnalyzePageDirty
/ (1024 * 1024) /
771 (delay_in_ms
/ 1000.0);
775 * We split this up so we don't emit empty I/O timing values when
776 * track_io_timing isn't enabled.
779 initStringInfo(&buf
);
780 appendStringInfo(&buf
, _("automatic analyze of table \"%s.%s.%s\"\n"),
781 get_database_name(MyDatabaseId
),
782 get_namespace_name(RelationGetNamespace(onerel
)),
783 RelationGetRelationName(onerel
));
786 double read_ms
= (double) (pgStatBlockReadTime
- startreadtime
) / 1000;
787 double write_ms
= (double) (pgStatBlockWriteTime
- startwritetime
) / 1000;
789 appendStringInfo(&buf
, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
792 appendStringInfo(&buf
, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
793 read_rate
, write_rate
);
794 appendStringInfo(&buf
, _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
795 (long long) AnalyzePageHit
,
796 (long long) AnalyzePageMiss
,
797 (long long) AnalyzePageDirty
);
798 appendStringInfo(&buf
, _("system usage: %s"), pg_rusage_show(&ru0
));
801 (errmsg_internal("%s", buf
.data
)));
807 /* Roll back any GUC changes executed by index functions */
808 AtEOXact_GUC(false, save_nestlevel
);
810 /* Restore userid and security context */
811 SetUserIdAndSecContext(save_userid
, save_sec_context
);
813 /* Restore current context and release memory */
814 MemoryContextSwitchTo(caller_context
);
815 MemoryContextDelete(anl_context
);
820 * Compute statistics about indexes of a relation
823 compute_index_stats(Relation onerel
, double totalrows
,
824 AnlIndexData
*indexdata
, int nindexes
,
825 HeapTuple
*rows
, int numrows
,
826 MemoryContext col_context
)
828 MemoryContext ind_context
,
830 Datum values
[INDEX_MAX_KEYS
];
831 bool isnull
[INDEX_MAX_KEYS
];
835 ind_context
= AllocSetContextCreate(anl_context
,
837 ALLOCSET_DEFAULT_SIZES
);
838 old_context
= MemoryContextSwitchTo(ind_context
);
840 for (ind
= 0; ind
< nindexes
; ind
++)
842 AnlIndexData
*thisdata
= &indexdata
[ind
];
843 IndexInfo
*indexInfo
= thisdata
->indexInfo
;
844 int attr_cnt
= thisdata
->attr_cnt
;
845 TupleTableSlot
*slot
;
847 ExprContext
*econtext
;
848 ExprState
*predicate
;
854 double totalindexrows
;
856 /* Ignore index if no columns to analyze and not partial */
857 if (attr_cnt
== 0 && indexInfo
->ii_Predicate
== NIL
)
861 * Need an EState for evaluation of index expressions and
862 * partial-index predicates. Create it in the per-index context to be
863 * sure it gets cleaned up at the bottom of the loop.
865 estate
= CreateExecutorState();
866 econtext
= GetPerTupleExprContext(estate
);
867 /* Need a slot to hold the current heap tuple, too */
868 slot
= MakeSingleTupleTableSlot(RelationGetDescr(onerel
),
871 /* Arrange for econtext's scan tuple to be the tuple under test */
872 econtext
->ecxt_scantuple
= slot
;
874 /* Set up execution state for predicate. */
875 predicate
= ExecPrepareQual(indexInfo
->ii_Predicate
, estate
);
877 /* Compute and save index expression values */
878 exprvals
= (Datum
*) palloc(numrows
* attr_cnt
* sizeof(Datum
));
879 exprnulls
= (bool *) palloc(numrows
* attr_cnt
* sizeof(bool));
882 for (rowno
= 0; rowno
< numrows
; rowno
++)
884 HeapTuple heapTuple
= rows
[rowno
];
886 vacuum_delay_point();
889 * Reset the per-tuple context each time, to reclaim any cruft
890 * left behind by evaluating the predicate or index expressions.
892 ResetExprContext(econtext
);
894 /* Set up for predicate or expression evaluation */
895 ExecStoreHeapTuple(heapTuple
, slot
, false);
897 /* If index is partial, check predicate */
898 if (predicate
!= NULL
)
900 if (!ExecQual(predicate
, econtext
))
908 * Evaluate the index row to compute expression values. We
909 * could do this by hand, but FormIndexDatum is convenient.
911 FormIndexDatum(indexInfo
,
918 * Save just the columns we care about. We copy the values
919 * into ind_context from the estate's per-tuple context.
921 for (i
= 0; i
< attr_cnt
; i
++)
923 VacAttrStats
*stats
= thisdata
->vacattrstats
[i
];
924 int attnum
= stats
->tupattnum
;
926 if (isnull
[attnum
- 1])
928 exprvals
[tcnt
] = (Datum
) 0;
929 exprnulls
[tcnt
] = true;
933 exprvals
[tcnt
] = datumCopy(values
[attnum
- 1],
934 stats
->attrtype
->typbyval
,
935 stats
->attrtype
->typlen
);
936 exprnulls
[tcnt
] = false;
944 * Having counted the number of rows that pass the predicate in the
945 * sample, we can estimate the total number of rows in the index.
947 thisdata
->tupleFract
= (double) numindexrows
/ (double) numrows
;
948 totalindexrows
= ceil(thisdata
->tupleFract
* totalrows
);
951 * Now we can compute the statistics for the expression columns.
953 if (numindexrows
> 0)
955 MemoryContextSwitchTo(col_context
);
956 for (i
= 0; i
< attr_cnt
; i
++)
958 VacAttrStats
*stats
= thisdata
->vacattrstats
[i
];
960 stats
->exprvals
= exprvals
+ i
;
961 stats
->exprnulls
= exprnulls
+ i
;
962 stats
->rowstride
= attr_cnt
;
963 stats
->compute_stats(stats
,
968 MemoryContextReset(col_context
);
973 MemoryContextSwitchTo(ind_context
);
975 ExecDropSingleTupleTableSlot(slot
);
976 FreeExecutorState(estate
);
977 MemoryContextReset(ind_context
);
980 MemoryContextSwitchTo(old_context
);
981 MemoryContextDelete(ind_context
);
985 * examine_attribute -- pre-analysis of a single column
987 * Determine whether the column is analyzable; if so, create and initialize
988 * a VacAttrStats struct for it. If not, return NULL.
990 * If index_expr isn't NULL, then we're trying to analyze an expression index,
991 * and index_expr is the expression tree representing the column's data.
993 static VacAttrStats
*
994 examine_attribute(Relation onerel
, int attnum
, Node
*index_expr
)
996 Form_pg_attribute attr
= TupleDescAttr(onerel
->rd_att
, attnum
- 1);
1002 VacAttrStats
*stats
;
1006 /* Never analyze dropped columns */
1007 if (attr
->attisdropped
)
1011 * Get attstattarget value. Set to -1 if null. (Analyze functions expect
1012 * -1 to mean use default_statistics_target; see for example
1015 atttuple
= SearchSysCache2(ATTNUM
, ObjectIdGetDatum(RelationGetRelid(onerel
)), Int16GetDatum(attnum
));
1016 if (!HeapTupleIsValid(atttuple
))
1017 elog(ERROR
, "cache lookup failed for attribute %d of relation %u",
1018 attnum
, RelationGetRelid(onerel
));
1019 dat
= SysCacheGetAttr(ATTNUM
, atttuple
, Anum_pg_attribute_attstattarget
, &isnull
);
1020 attstattarget
= isnull
? -1 : DatumGetInt16(dat
);
1021 ReleaseSysCache(atttuple
);
1023 /* Don't analyze column if user has specified not to */
1024 if (attstattarget
== 0)
1028 * Create the VacAttrStats struct.
1030 stats
= (VacAttrStats
*) palloc0(sizeof(VacAttrStats
));
1031 stats
->attstattarget
= attstattarget
;
1034 * When analyzing an expression index, believe the expression tree's type
1035 * not the column datatype --- the latter might be the opckeytype storage
1036 * type of the opclass, which is not interesting for our purposes. (Note:
1037 * if we did anything with non-expression index columns, we'd need to
1038 * figure out where to get the correct type info from, but for now that's
1039 * not a problem.) It's not clear whether anyone will care about the
1040 * typmod, but we store that too just in case.
1044 stats
->attrtypid
= exprType(index_expr
);
1045 stats
->attrtypmod
= exprTypmod(index_expr
);
1048 * If a collation has been specified for the index column, use that in
1049 * preference to anything else; but if not, fall back to whatever we
1050 * can get from the expression.
1052 if (OidIsValid(onerel
->rd_indcollation
[attnum
- 1]))
1053 stats
->attrcollid
= onerel
->rd_indcollation
[attnum
- 1];
1055 stats
->attrcollid
= exprCollation(index_expr
);
1059 stats
->attrtypid
= attr
->atttypid
;
1060 stats
->attrtypmod
= attr
->atttypmod
;
1061 stats
->attrcollid
= attr
->attcollation
;
1064 typtuple
= SearchSysCacheCopy1(TYPEOID
,
1065 ObjectIdGetDatum(stats
->attrtypid
));
1066 if (!HeapTupleIsValid(typtuple
))
1067 elog(ERROR
, "cache lookup failed for type %u", stats
->attrtypid
);
1068 stats
->attrtype
= (Form_pg_type
) GETSTRUCT(typtuple
);
1069 stats
->anl_context
= anl_context
;
1070 stats
->tupattnum
= attnum
;
1073 * The fields describing the stats->stavalues[n] element types default to
1074 * the type of the data being analyzed, but the type-specific typanalyze
1075 * function can change them if it wants to store something else.
1077 for (i
= 0; i
< STATISTIC_NUM_SLOTS
; i
++)
1079 stats
->statypid
[i
] = stats
->attrtypid
;
1080 stats
->statyplen
[i
] = stats
->attrtype
->typlen
;
1081 stats
->statypbyval
[i
] = stats
->attrtype
->typbyval
;
1082 stats
->statypalign
[i
] = stats
->attrtype
->typalign
;
1086 * Call the type-specific typanalyze function. If none is specified, use
1089 if (OidIsValid(stats
->attrtype
->typanalyze
))
1090 ok
= DatumGetBool(OidFunctionCall1(stats
->attrtype
->typanalyze
,
1091 PointerGetDatum(stats
)));
1093 ok
= std_typanalyze(stats
);
1095 if (!ok
|| stats
->compute_stats
== NULL
|| stats
->minrows
<= 0)
1097 heap_freetuple(typtuple
);
1106 * Read stream callback returning the next BlockNumber as chosen by the
1107 * BlockSampling algorithm.
1110 block_sampling_read_stream_next(ReadStream
*stream
,
1111 void *callback_private_data
,
1112 void *per_buffer_data
)
1114 BlockSamplerData
*bs
= callback_private_data
;
1116 return BlockSampler_HasMore(bs
) ? BlockSampler_Next(bs
) : InvalidBlockNumber
;
1120 * acquire_sample_rows -- acquire a random sample of rows from the table
1122 * Selected rows are returned in the caller-allocated array rows[], which
1123 * must have at least targrows entries.
1124 * The actual number of rows selected is returned as the function result.
1125 * We also estimate the total numbers of live and dead rows in the table,
1126 * and return them into *totalrows and *totaldeadrows, respectively.
1128 * The returned list of tuples is in order by physical position in the table.
1129 * (We will rely on this later to derive correlation estimates.)
1131 * As of May 2004 we use a new two-stage method: Stage one selects up
1132 * to targrows random blocks (or all blocks, if there aren't so many).
1133 * Stage two scans these blocks and uses the Vitter algorithm to create
1134 * a random sample of targrows rows (or less, if there are less in the
1135 * sample of blocks). The two stages are executed simultaneously: each
1136 * block is processed as soon as stage one returns its number and while
1137 * the rows are read stage two controls which ones are to be inserted
1140 * Although every row has an equal chance of ending up in the final
1141 * sample, this sampling method is not perfect: not every possible
1142 * sample has an equal chance of being selected. For large relations
1143 * the number of different blocks represented by the sample tends to be
1144 * too small. We can live with that for now. Improvements are welcome.
1146 * An important property of this sampling method is that because we do
1147 * look at a statistically unbiased set of blocks, we should get
1148 * unbiased estimates of the average numbers of live and dead rows per
1149 * block. The previous sampling method put too much credence in the row
1150 * density near the start of the table.
1153 acquire_sample_rows(Relation onerel
, int elevel
,
1154 HeapTuple
*rows
, int targrows
,
1155 double *totalrows
, double *totaldeadrows
)
1157 int numrows
= 0; /* # rows now in reservoir */
1158 double samplerows
= 0; /* total # rows collected */
1159 double liverows
= 0; /* # live rows seen */
1160 double deadrows
= 0; /* # dead rows seen */
1161 double rowstoskip
= -1; /* -1 means not set yet */
1162 uint32 randseed
; /* Seed for block sampler(s) */
1163 BlockNumber totalblocks
;
1164 TransactionId OldestXmin
;
1165 BlockSamplerData bs
;
1166 ReservoirStateData rstate
;
1167 TupleTableSlot
*slot
;
1169 BlockNumber nblocks
;
1170 BlockNumber blksdone
= 0;
1173 Assert(targrows
> 0);
1175 totalblocks
= RelationGetNumberOfBlocks(onerel
);
1177 /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
1178 OldestXmin
= GetOldestNonRemovableTransactionId(onerel
);
1180 /* Prepare for sampling block numbers */
1181 randseed
= pg_prng_uint32(&pg_global_prng_state
);
1182 nblocks
= BlockSampler_Init(&bs
, totalblocks
, targrows
, randseed
);
1184 /* Report sampling block numbers */
1185 pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_TOTAL
,
1188 /* Prepare for sampling rows */
1189 reservoir_init_selection_state(&rstate
, targrows
);
1191 scan
= table_beginscan_analyze(onerel
);
1192 slot
= table_slot_create(onerel
, NULL
);
1194 stream
= read_stream_begin_relation(READ_STREAM_MAINTENANCE
,
1198 block_sampling_read_stream_next
,
1202 /* Outer loop over blocks to sample */
1203 while (table_scan_analyze_next_block(scan
, stream
))
1205 vacuum_delay_point();
1207 while (table_scan_analyze_next_tuple(scan
, OldestXmin
, &liverows
, &deadrows
, slot
))
1210 * The first targrows sample rows are simply copied into the
1211 * reservoir. Then we start replacing tuples in the sample until
1212 * we reach the end of the relation. This algorithm is from Jeff
1213 * Vitter's paper (see full citation in utils/misc/sampling.c). It
1214 * works by repeatedly computing the number of tuples to skip
1215 * before selecting a tuple, which replaces a randomly chosen
1216 * element of the reservoir (current set of tuples). At all times
1217 * the reservoir is a true random sample of the tuples we've
1218 * passed over so far, so when we fall off the end of the relation
1221 if (numrows
< targrows
)
1222 rows
[numrows
++] = ExecCopySlotHeapTuple(slot
);
1226 * t in Vitter's paper is the number of records already
1227 * processed. If we need to compute a new S value, we must
1228 * use the not-yet-incremented value of samplerows as t.
1231 rowstoskip
= reservoir_get_next_S(&rstate
, samplerows
, targrows
);
1233 if (rowstoskip
<= 0)
1236 * Found a suitable tuple, so save it, replacing one old
1239 int k
= (int) (targrows
* sampler_random_fract(&rstate
.randstate
));
1241 Assert(k
>= 0 && k
< targrows
);
1242 heap_freetuple(rows
[k
]);
1243 rows
[k
] = ExecCopySlotHeapTuple(slot
);
1252 pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_DONE
,
1256 read_stream_end(stream
);
1258 ExecDropSingleTupleTableSlot(slot
);
1259 table_endscan(scan
);
1262 * If we didn't find as many tuples as we wanted then we're done. No sort
1263 * is needed, since they're already in order.
1265 * Otherwise we need to sort the collected tuples by position
1266 * (itempointer). It's not worth worrying about corner cases where the
1267 * tuples are already sorted.
1269 if (numrows
== targrows
)
1270 qsort_interruptible(rows
, numrows
, sizeof(HeapTuple
),
1271 compare_rows
, NULL
);
1274 * Estimate total numbers of live and dead rows in relation, extrapolating
1275 * on the assumption that the average tuple density in pages we didn't
1276 * scan is the same as in the pages we did scan. Since what we scanned is
1277 * a random sample of the pages in the relation, this should be a good
1282 *totalrows
= floor((liverows
/ bs
.m
) * totalblocks
+ 0.5);
1283 *totaldeadrows
= floor((deadrows
/ bs
.m
) * totalblocks
+ 0.5);
1288 *totaldeadrows
= 0.0;
1292 * Emit some interesting relation info
1295 (errmsg("\"%s\": scanned %d of %u pages, "
1296 "containing %.0f live rows and %.0f dead rows; "
1297 "%d rows in sample, %.0f estimated total rows",
1298 RelationGetRelationName(onerel
),
1301 numrows
, *totalrows
)));
1307 * Comparator for sorting rows[] array
1310 compare_rows(const void *a
, const void *b
, void *arg
)
1312 HeapTuple ha
= *(const HeapTuple
*) a
;
1313 HeapTuple hb
= *(const HeapTuple
*) b
;
1314 BlockNumber ba
= ItemPointerGetBlockNumber(&ha
->t_self
);
1315 OffsetNumber oa
= ItemPointerGetOffsetNumber(&ha
->t_self
);
1316 BlockNumber bb
= ItemPointerGetBlockNumber(&hb
->t_self
);
1317 OffsetNumber ob
= ItemPointerGetOffsetNumber(&hb
->t_self
);
1332 * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
1334 * This has the same API as acquire_sample_rows, except that rows are
1335 * collected from all inheritance children as well as the specified table.
1336 * We fail and return zero if there are no inheritance children, or if all
1337 * children are foreign tables that don't support ANALYZE.
1340 acquire_inherited_sample_rows(Relation onerel
, int elevel
,
1341 HeapTuple
*rows
, int targrows
,
1342 double *totalrows
, double *totaldeadrows
)
1346 AcquireSampleRowsFunc
*acquirefuncs
;
1355 /* Initialize output parameters to zero now, in case we exit early */
1360 * Find all members of inheritance set. We only need AccessShareLock on
1364 find_all_inheritors(RelationGetRelid(onerel
), AccessShareLock
, NULL
);
1367 * Check that there's at least one descendant, else fail. This could
1368 * happen despite analyze_rel's relhassubclass check, if table once had a
1369 * child but no longer does. In that case, we can clear the
1370 * relhassubclass field so as not to make the same mistake again later.
1371 * (This is safe because we hold ShareUpdateExclusiveLock.)
1373 if (list_length(tableOIDs
) < 2)
1375 /* CCI because we already updated the pg_class row in this command */
1376 CommandCounterIncrement();
1377 SetRelationHasSubclass(RelationGetRelid(onerel
), false);
1379 (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables",
1380 get_namespace_name(RelationGetNamespace(onerel
)),
1381 RelationGetRelationName(onerel
))));
1386 * Identify acquirefuncs to use, and count blocks in all the relations.
1387 * The result could overflow BlockNumber, so we use double arithmetic.
1389 rels
= (Relation
*) palloc(list_length(tableOIDs
) * sizeof(Relation
));
1390 acquirefuncs
= (AcquireSampleRowsFunc
*)
1391 palloc(list_length(tableOIDs
) * sizeof(AcquireSampleRowsFunc
));
1392 relblocks
= (double *) palloc(list_length(tableOIDs
) * sizeof(double));
1396 foreach(lc
, tableOIDs
)
1398 Oid childOID
= lfirst_oid(lc
);
1400 AcquireSampleRowsFunc acquirefunc
= NULL
;
1401 BlockNumber relpages
= 0;
1403 /* We already got the needed lock */
1404 childrel
= table_open(childOID
, NoLock
);
1406 /* Ignore if temp table of another backend */
1407 if (RELATION_IS_OTHER_TEMP(childrel
))
1409 /* ... but release the lock on it */
1410 Assert(childrel
!= onerel
);
1411 table_close(childrel
, AccessShareLock
);
1415 /* Check table type (MATVIEW can't happen, but might as well allow) */
1416 if (childrel
->rd_rel
->relkind
== RELKIND_RELATION
||
1417 childrel
->rd_rel
->relkind
== RELKIND_MATVIEW
)
1419 /* Regular table, so use the regular row acquisition function */
1420 acquirefunc
= acquire_sample_rows
;
1421 relpages
= RelationGetNumberOfBlocks(childrel
);
1423 else if (childrel
->rd_rel
->relkind
== RELKIND_FOREIGN_TABLE
)
1426 * For a foreign table, call the FDW's hook function to see
1427 * whether it supports analysis.
1429 FdwRoutine
*fdwroutine
;
1432 fdwroutine
= GetFdwRoutineForRelation(childrel
, false);
1434 if (fdwroutine
->AnalyzeForeignTable
!= NULL
)
1435 ok
= fdwroutine
->AnalyzeForeignTable(childrel
,
1441 /* ignore, but release the lock on it */
1442 Assert(childrel
!= onerel
);
1443 table_close(childrel
, AccessShareLock
);
1450 * ignore, but release the lock on it. don't try to unlock the
1451 * passed-in relation
1453 Assert(childrel
->rd_rel
->relkind
== RELKIND_PARTITIONED_TABLE
);
1454 if (childrel
!= onerel
)
1455 table_close(childrel
, AccessShareLock
);
1457 table_close(childrel
, NoLock
);
1461 /* OK, we'll process this child */
1463 rels
[nrels
] = childrel
;
1464 acquirefuncs
[nrels
] = acquirefunc
;
1465 relblocks
[nrels
] = (double) relpages
;
1466 totalblocks
+= (double) relpages
;
1471 * If we don't have at least one child table to consider, fail. If the
1472 * relation is a partitioned table, it's not counted as a child table.
1477 (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables",
1478 get_namespace_name(RelationGetNamespace(onerel
)),
1479 RelationGetRelationName(onerel
))));
1484 * Now sample rows from each relation, proportionally to its fraction of
1485 * the total block count. (This might be less than desirable if the child
1486 * rels have radically different free-space percentages, but it's not
1487 * clear that it's worth working harder.)
1489 pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_TOTAL
,
1492 for (i
= 0; i
< nrels
; i
++)
1494 Relation childrel
= rels
[i
];
1495 AcquireSampleRowsFunc acquirefunc
= acquirefuncs
[i
];
1496 double childblocks
= relblocks
[i
];
1499 * Report progress. The sampling function will normally report blocks
1500 * done/total, but we need to reset them to 0 here, so that they don't
1501 * show an old value until that.
1504 const int progress_index
[] = {
1505 PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID
,
1506 PROGRESS_ANALYZE_BLOCKS_DONE
,
1507 PROGRESS_ANALYZE_BLOCKS_TOTAL
1509 const int64 progress_vals
[] = {
1510 RelationGetRelid(childrel
),
1515 pgstat_progress_update_multi_param(3, progress_index
, progress_vals
);
1518 if (childblocks
> 0)
1522 childtargrows
= (int) rint(targrows
* childblocks
/ totalblocks
);
1523 /* Make sure we don't overrun due to roundoff error */
1524 childtargrows
= Min(childtargrows
, targrows
- numrows
);
1525 if (childtargrows
> 0)
1531 /* Fetch a random sample of the child's rows */
1532 childrows
= (*acquirefunc
) (childrel
, elevel
,
1533 rows
+ numrows
, childtargrows
,
1536 /* We may need to convert from child's rowtype to parent's */
1537 if (childrows
> 0 &&
1538 !equalRowTypes(RelationGetDescr(childrel
),
1539 RelationGetDescr(onerel
)))
1541 TupleConversionMap
*map
;
1543 map
= convert_tuples_by_name(RelationGetDescr(childrel
),
1544 RelationGetDescr(onerel
));
1549 for (j
= 0; j
< childrows
; j
++)
1553 newtup
= execute_attr_map_tuple(rows
[numrows
+ j
], map
);
1554 heap_freetuple(rows
[numrows
+ j
]);
1555 rows
[numrows
+ j
] = newtup
;
1557 free_conversion_map(map
);
1561 /* And add to counts */
1562 numrows
+= childrows
;
1563 *totalrows
+= trows
;
1564 *totaldeadrows
+= tdrows
;
1569 * Note: we cannot release the child-table locks, since we may have
1570 * pointers to their TOAST tables in the sampled rows.
1572 table_close(childrel
, NoLock
);
1573 pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_DONE
,
1582 * update_attstats() -- update attribute statistics for one relation
1584 * Statistics are stored in several places: the pg_class row for the
1585 * relation has stats about the whole relation, and there is a
1586 * pg_statistic row for each (non-system) attribute that has ever
1587 * been analyzed. The pg_class values are updated by VACUUM, not here.
1589 * pg_statistic rows are just added or updated normally. This means
1590 * that pg_statistic will probably contain some deleted rows at the
1591 * completion of a vacuum cycle, unless it happens to get vacuumed last.
1593 * To keep things simple, we punt for pg_statistic, and don't try
1594 * to compute or store rows for pg_statistic itself in pg_statistic.
1595 * This could possibly be made to work, but it's not worth the trouble.
1596 * Note analyze_rel() has seen to it that we won't come here when
1597 * vacuuming pg_statistic itself.
1599 * Note: there would be a race condition here if two backends could
1600 * ANALYZE the same table concurrently. Presently, we lock that out
1601 * by taking a self-exclusive lock on the relation in analyze_rel().
1604 update_attstats(Oid relid
, bool inh
, int natts
, VacAttrStats
**vacattrstats
)
1608 CatalogIndexState indstate
= NULL
;
1611 return; /* nothing to do */
1613 sd
= table_open(StatisticRelationId
, RowExclusiveLock
);
1615 for (attno
= 0; attno
< natts
; attno
++)
1617 VacAttrStats
*stats
= vacattrstats
[attno
];
1623 Datum values
[Natts_pg_statistic
];
1624 bool nulls
[Natts_pg_statistic
];
1625 bool replaces
[Natts_pg_statistic
];
1627 /* Ignore attr if we weren't able to collect stats */
1628 if (!stats
->stats_valid
)
1632 * Construct a new pg_statistic tuple
1634 for (i
= 0; i
< Natts_pg_statistic
; ++i
)
1640 values
[Anum_pg_statistic_starelid
- 1] = ObjectIdGetDatum(relid
);
1641 values
[Anum_pg_statistic_staattnum
- 1] = Int16GetDatum(stats
->tupattnum
);
1642 values
[Anum_pg_statistic_stainherit
- 1] = BoolGetDatum(inh
);
1643 values
[Anum_pg_statistic_stanullfrac
- 1] = Float4GetDatum(stats
->stanullfrac
);
1644 values
[Anum_pg_statistic_stawidth
- 1] = Int32GetDatum(stats
->stawidth
);
1645 values
[Anum_pg_statistic_stadistinct
- 1] = Float4GetDatum(stats
->stadistinct
);
1646 i
= Anum_pg_statistic_stakind1
- 1;
1647 for (k
= 0; k
< STATISTIC_NUM_SLOTS
; k
++)
1649 values
[i
++] = Int16GetDatum(stats
->stakind
[k
]); /* stakindN */
1651 i
= Anum_pg_statistic_staop1
- 1;
1652 for (k
= 0; k
< STATISTIC_NUM_SLOTS
; k
++)
1654 values
[i
++] = ObjectIdGetDatum(stats
->staop
[k
]); /* staopN */
1656 i
= Anum_pg_statistic_stacoll1
- 1;
1657 for (k
= 0; k
< STATISTIC_NUM_SLOTS
; k
++)
1659 values
[i
++] = ObjectIdGetDatum(stats
->stacoll
[k
]); /* stacollN */
1661 i
= Anum_pg_statistic_stanumbers1
- 1;
1662 for (k
= 0; k
< STATISTIC_NUM_SLOTS
; k
++)
1664 int nnum
= stats
->numnumbers
[k
];
1668 Datum
*numdatums
= (Datum
*) palloc(nnum
* sizeof(Datum
));
1671 for (n
= 0; n
< nnum
; n
++)
1672 numdatums
[n
] = Float4GetDatum(stats
->stanumbers
[k
][n
]);
1673 arry
= construct_array_builtin(numdatums
, nnum
, FLOAT4OID
);
1674 values
[i
++] = PointerGetDatum(arry
); /* stanumbersN */
1679 values
[i
++] = (Datum
) 0;
1682 i
= Anum_pg_statistic_stavalues1
- 1;
1683 for (k
= 0; k
< STATISTIC_NUM_SLOTS
; k
++)
1685 if (stats
->numvalues
[k
] > 0)
1689 arry
= construct_array(stats
->stavalues
[k
],
1690 stats
->numvalues
[k
],
1692 stats
->statyplen
[k
],
1693 stats
->statypbyval
[k
],
1694 stats
->statypalign
[k
]);
1695 values
[i
++] = PointerGetDatum(arry
); /* stavaluesN */
1700 values
[i
++] = (Datum
) 0;
1704 /* Is there already a pg_statistic tuple for this attribute? */
1705 oldtup
= SearchSysCache3(STATRELATTINH
,
1706 ObjectIdGetDatum(relid
),
1707 Int16GetDatum(stats
->tupattnum
),
1710 /* Open index information when we know we need it */
1711 if (indstate
== NULL
)
1712 indstate
= CatalogOpenIndexes(sd
);
1714 if (HeapTupleIsValid(oldtup
))
1716 /* Yes, replace it */
1717 stup
= heap_modify_tuple(oldtup
,
1718 RelationGetDescr(sd
),
1722 ReleaseSysCache(oldtup
);
1723 CatalogTupleUpdateWithInfo(sd
, &stup
->t_self
, stup
, indstate
);
1727 /* No, insert new tuple */
1728 stup
= heap_form_tuple(RelationGetDescr(sd
), values
, nulls
);
1729 CatalogTupleInsertWithInfo(sd
, stup
, indstate
);
1732 heap_freetuple(stup
);
1735 if (indstate
!= NULL
)
1736 CatalogCloseIndexes(indstate
);
1737 table_close(sd
, RowExclusiveLock
);
1741 * Standard fetch function for use by compute_stats subroutines.
1743 * This exists to provide some insulation between compute_stats routines
1744 * and the actual storage of the sample data.
1747 std_fetch_func(VacAttrStatsP stats
, int rownum
, bool *isNull
)
1749 int attnum
= stats
->tupattnum
;
1750 HeapTuple tuple
= stats
->rows
[rownum
];
1751 TupleDesc tupDesc
= stats
->tupDesc
;
1753 return heap_getattr(tuple
, attnum
, tupDesc
, isNull
);
1757 * Fetch function for analyzing index expressions.
1759 * We have not bothered to construct index tuples, instead the data is
1760 * just in Datum arrays.
1763 ind_fetch_func(VacAttrStatsP stats
, int rownum
, bool *isNull
)
1767 /* exprvals and exprnulls are already offset for proper column */
1768 i
= rownum
* stats
->rowstride
;
1769 *isNull
= stats
->exprnulls
[i
];
1770 return stats
->exprvals
[i
];
1774 /*==========================================================================
1776 * Code below this point represents the "standard" type-specific statistics
1777 * analysis algorithms. This code can be replaced on a per-data-type basis
1778 * by setting a nonzero value in pg_type.typanalyze.
1780 *==========================================================================
1785 * To avoid consuming too much memory during analysis and/or too much space
1786 * in the resulting pg_statistic rows, we ignore varlena datums that are wider
1787 * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV
1788 * and distinct-value calculations since a wide value is unlikely to be
1789 * duplicated at all, much less be a most-common value. For the same reason,
1790 * ignoring wide values will not affect our estimates of histogram bin
1791 * boundaries very much.
1793 #define WIDTH_THRESHOLD 1024
1795 #define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)
1796 #define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)
1799 * Extra information used by the default analysis routines
1803 int count
; /* # of duplicates */
1804 int first
; /* values[] index of first occurrence */
1811 } CompareScalarsContext
;
1814 static void compute_trivial_stats(VacAttrStatsP stats
,
1815 AnalyzeAttrFetchFunc fetchfunc
,
1818 static void compute_distinct_stats(VacAttrStatsP stats
,
1819 AnalyzeAttrFetchFunc fetchfunc
,
1822 static void compute_scalar_stats(VacAttrStatsP stats
,
1823 AnalyzeAttrFetchFunc fetchfunc
,
1826 static int compare_scalars(const void *a
, const void *b
, void *arg
);
1827 static int compare_mcvs(const void *a
, const void *b
, void *arg
);
1828 static int analyze_mcv_list(int *mcv_counts
,
1837 * std_typanalyze -- the default type-specific typanalyze function
1840 std_typanalyze(VacAttrStats
*stats
)
1844 StdAnalyzeData
*mystats
;
1846 /* If the attstattarget column is negative, use the default value */
1847 if (stats
->attstattarget
< 0)
1848 stats
->attstattarget
= default_statistics_target
;
1850 /* Look for default "<" and "=" operators for column's type */
1851 get_sort_group_operators(stats
->attrtypid
,
1852 false, false, false,
1853 <opr
, &eqopr
, NULL
,
1856 /* Save the operator info for compute_stats routines */
1857 mystats
= (StdAnalyzeData
*) palloc(sizeof(StdAnalyzeData
));
1858 mystats
->eqopr
= eqopr
;
1859 mystats
->eqfunc
= OidIsValid(eqopr
) ? get_opcode(eqopr
) : InvalidOid
;
1860 mystats
->ltopr
= ltopr
;
1861 stats
->extra_data
= mystats
;
1864 * Determine which standard statistics algorithm to use
1866 if (OidIsValid(eqopr
) && OidIsValid(ltopr
))
1868 /* Seems to be a scalar datatype */
1869 stats
->compute_stats
= compute_scalar_stats
;
1870 /*--------------------
1871 * The following choice of minrows is based on the paper
1872 * "Random sampling for histogram construction: how much is enough?"
1873 * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
1874 * Proceedings of ACM SIGMOD International Conference on Management
1875 * of Data, 1998, Pages 436-447. Their Corollary 1 to Theorem 5
1876 * says that for table size n, histogram size k, maximum relative
1877 * error in bin size f, and error probability gamma, the minimum
1878 * random sample size is
1879 * r = 4 * k * ln(2*n/gamma) / f^2
1880 * Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain
1882 * Note that because of the log function, the dependence on n is
1883 * quite weak; even at n = 10^12, a 300*k sample gives <= 0.66
1884 * bin size error with probability 0.99. So there's no real need to
1885 * scale for n, which is a good thing because we don't necessarily
1886 * know it at this point.
1887 *--------------------
1889 stats
->minrows
= 300 * stats
->attstattarget
;
1891 else if (OidIsValid(eqopr
))
1893 /* We can still recognize distinct values */
1894 stats
->compute_stats
= compute_distinct_stats
;
1895 /* Might as well use the same minrows as above */
1896 stats
->minrows
= 300 * stats
->attstattarget
;
1900 /* Can't do much but the trivial stuff */
1901 stats
->compute_stats
= compute_trivial_stats
;
1902 /* Might as well use the same minrows as above */
1903 stats
->minrows
= 300 * stats
->attstattarget
;
1911 * compute_trivial_stats() -- compute very basic column statistics
1913 * We use this when we cannot find a hash "=" operator for the datatype.
1915 * We determine the fraction of non-null rows and the average datum width.
1918 compute_trivial_stats(VacAttrStatsP stats
,
1919 AnalyzeAttrFetchFunc fetchfunc
,
1925 int nonnull_cnt
= 0;
1926 double total_width
= 0;
1927 bool is_varlena
= (!stats
->attrtype
->typbyval
&&
1928 stats
->attrtype
->typlen
== -1);
1929 bool is_varwidth
= (!stats
->attrtype
->typbyval
&&
1930 stats
->attrtype
->typlen
< 0);
1932 for (i
= 0; i
< samplerows
; i
++)
1937 vacuum_delay_point();
1939 value
= fetchfunc(stats
, i
, &isnull
);
1941 /* Check for null/nonnull */
1950 * If it's a variable-width field, add up widths for average width
1951 * calculation. Note that if the value is toasted, we use the toasted
1952 * width. We don't bother with this calculation if it's a fixed-width
1957 total_width
+= VARSIZE_ANY(DatumGetPointer(value
));
1959 else if (is_varwidth
)
1961 /* must be cstring */
1962 total_width
+= strlen(DatumGetCString(value
)) + 1;
1966 /* We can only compute average width if we found some non-null values. */
1967 if (nonnull_cnt
> 0)
1969 stats
->stats_valid
= true;
1970 /* Do the simple null-frac and width stats */
1971 stats
->stanullfrac
= (double) null_cnt
/ (double) samplerows
;
1973 stats
->stawidth
= total_width
/ (double) nonnull_cnt
;
1975 stats
->stawidth
= stats
->attrtype
->typlen
;
1976 stats
->stadistinct
= 0.0; /* "unknown" */
1978 else if (null_cnt
> 0)
1980 /* We found only nulls; assume the column is entirely null */
1981 stats
->stats_valid
= true;
1982 stats
->stanullfrac
= 1.0;
1984 stats
->stawidth
= 0; /* "unknown" */
1986 stats
->stawidth
= stats
->attrtype
->typlen
;
1987 stats
->stadistinct
= 0.0; /* "unknown" */
1993 * compute_distinct_stats() -- compute column statistics including ndistinct
1995 * We use this when we can find only an "=" operator for the datatype.
1997 * We determine the fraction of non-null rows, the average width, the
1998 * most common values, and the (estimated) number of distinct values.
2000 * The most common values are determined by brute force: we keep a list
2001 * of previously seen values, ordered by number of times seen, as we scan
2002 * the samples. A newly seen value is inserted just after the last
2003 * multiply-seen value, causing the bottommost (oldest) singly-seen value
2004 * to drop off the list. The accuracy of this method, and also its cost,
2005 * depend mainly on the length of the list we are willing to keep.
2008 compute_distinct_stats(VacAttrStatsP stats
,
2009 AnalyzeAttrFetchFunc fetchfunc
,
2015 int nonnull_cnt
= 0;
2016 int toowide_cnt
= 0;
2017 double total_width
= 0;
2018 bool is_varlena
= (!stats
->attrtype
->typbyval
&&
2019 stats
->attrtype
->typlen
== -1);
2020 bool is_varwidth
= (!stats
->attrtype
->typbyval
&&
2021 stats
->attrtype
->typlen
< 0);
2031 int num_mcv
= stats
->attstattarget
;
2032 StdAnalyzeData
*mystats
= (StdAnalyzeData
*) stats
->extra_data
;
2035 * We track up to 2*n values for an n-element MCV list; but at least 10
2037 track_max
= 2 * num_mcv
;
2040 track
= (TrackItem
*) palloc(track_max
* sizeof(TrackItem
));
2043 fmgr_info(mystats
->eqfunc
, &f_cmpeq
);
2045 for (i
= 0; i
< samplerows
; i
++)
2053 vacuum_delay_point();
2055 value
= fetchfunc(stats
, i
, &isnull
);
2057 /* Check for null/nonnull */
2066 * If it's a variable-width field, add up widths for average width
2067 * calculation. Note that if the value is toasted, we use the toasted
2068 * width. We don't bother with this calculation if it's a fixed-width
2073 total_width
+= VARSIZE_ANY(DatumGetPointer(value
));
2076 * If the value is toasted, we want to detoast it just once to
2077 * avoid repeated detoastings and resultant excess memory usage
2078 * during the comparisons. Also, check to see if the value is
2079 * excessively wide, and if so don't detoast at all --- just
2082 if (toast_raw_datum_size(value
) > WIDTH_THRESHOLD
)
2087 value
= PointerGetDatum(PG_DETOAST_DATUM(value
));
2089 else if (is_varwidth
)
2091 /* must be cstring */
2092 total_width
+= strlen(DatumGetCString(value
)) + 1;
2096 * See if the value matches anything we're already tracking.
2099 firstcount1
= track_cnt
;
2100 for (j
= 0; j
< track_cnt
; j
++)
2102 if (DatumGetBool(FunctionCall2Coll(&f_cmpeq
,
2104 value
, track
[j
].value
)))
2109 if (j
< firstcount1
&& track
[j
].count
== 1)
2117 /* This value may now need to "bubble up" in the track list */
2118 while (j
> 0 && track
[j
].count
> track
[j
- 1].count
)
2120 swapDatum(track
[j
].value
, track
[j
- 1].value
);
2121 swapInt(track
[j
].count
, track
[j
- 1].count
);
2127 /* No match. Insert at head of count-1 list */
2128 if (track_cnt
< track_max
)
2130 for (j
= track_cnt
- 1; j
> firstcount1
; j
--)
2132 track
[j
].value
= track
[j
- 1].value
;
2133 track
[j
].count
= track
[j
- 1].count
;
2135 if (firstcount1
< track_cnt
)
2137 track
[firstcount1
].value
= value
;
2138 track
[firstcount1
].count
= 1;
2143 /* We can only compute real stats if we found some non-null values. */
2144 if (nonnull_cnt
> 0)
2149 stats
->stats_valid
= true;
2150 /* Do the simple null-frac and width stats */
2151 stats
->stanullfrac
= (double) null_cnt
/ (double) samplerows
;
2153 stats
->stawidth
= total_width
/ (double) nonnull_cnt
;
2155 stats
->stawidth
= stats
->attrtype
->typlen
;
2157 /* Count the number of values we found multiple times */
2159 for (nmultiple
= 0; nmultiple
< track_cnt
; nmultiple
++)
2161 if (track
[nmultiple
].count
== 1)
2163 summultiple
+= track
[nmultiple
].count
;
2169 * If we found no repeated non-null values, assume it's a unique
2170 * column; but be sure to discount for any nulls we found.
2172 stats
->stadistinct
= -1.0 * (1.0 - stats
->stanullfrac
);
2174 else if (track_cnt
< track_max
&& toowide_cnt
== 0 &&
2175 nmultiple
== track_cnt
)
2178 * Our track list includes every value in the sample, and every
2179 * value appeared more than once. Assume the column has just
2180 * these values. (This case is meant to address columns with
2181 * small, fixed sets of possible values, such as boolean or enum
2182 * columns. If there are any values that appear just once in the
2183 * sample, including too-wide values, we should assume that that's
2184 * not what we're dealing with.)
2186 stats
->stadistinct
= track_cnt
;
2191 * Estimate the number of distinct values using the estimator
2192 * proposed by Haas and Stokes in IBM Research Report RJ 10025:
2193 * n*d / (n - f1 + f1*n/N)
2194 * where f1 is the number of distinct values that occurred
2195 * exactly once in our sample of n rows (from a total of N),
2196 * and d is the total number of distinct values in the sample.
2197 * This is their Duj1 estimator; the other estimators they
2198 * recommend are considerably more complex, and are numerically
2199 * very unstable when n is much smaller than N.
2201 * In this calculation, we consider only non-nulls. We used to
2202 * include rows with null values in the n and N counts, but that
2203 * leads to inaccurate answers in columns with many nulls, and
2204 * it's intuitively bogus anyway considering the desired result is
2205 * the number of distinct non-null values.
2207 * We assume (not very reliably!) that all the multiply-occurring
2208 * values are reflected in the final track[] list, and the other
2209 * nonnull values all appeared but once. (XXX this usually
2210 * results in a drastic overestimate of ndistinct. Can we do
2214 int f1
= nonnull_cnt
- summultiple
;
2215 int d
= f1
+ nmultiple
;
2216 double n
= samplerows
- null_cnt
;
2217 double N
= totalrows
* (1.0 - stats
->stanullfrac
);
2220 /* N == 0 shouldn't happen, but just in case ... */
2222 stadistinct
= (n
* d
) / ((n
- f1
) + f1
* n
/ N
);
2226 /* Clamp to sane range in case of roundoff error */
2227 if (stadistinct
< d
)
2229 if (stadistinct
> N
)
2231 /* And round to integer */
2232 stats
->stadistinct
= floor(stadistinct
+ 0.5);
2236 * If we estimated the number of distinct values at more than 10% of
2237 * the total row count (a very arbitrary limit), then assume that
2238 * stadistinct should scale with the row count rather than be a fixed
2241 if (stats
->stadistinct
> 0.1 * totalrows
)
2242 stats
->stadistinct
= -(stats
->stadistinct
/ totalrows
);
2245 * Decide how many values are worth storing as most-common values. If
2246 * we are able to generate a complete MCV list (all the values in the
2247 * sample will fit, and we think these are all the ones in the table),
2248 * then do so. Otherwise, store only those values that are
2249 * significantly more common than the values not in the list.
2251 * Note: the first of these cases is meant to address columns with
2252 * small, fixed sets of possible values, such as boolean or enum
2253 * columns. If we can *completely* represent the column population by
2254 * an MCV list that will fit into the stats target, then we should do
2255 * so and thus provide the planner with complete information. But if
2256 * the MCV list is not complete, it's generally worth being more
2257 * selective, and not just filling it all the way up to the stats
2260 if (track_cnt
< track_max
&& toowide_cnt
== 0 &&
2261 stats
->stadistinct
> 0 &&
2262 track_cnt
<= num_mcv
)
2264 /* Track list includes all values seen, and all will fit */
2265 num_mcv
= track_cnt
;
2271 /* Incomplete list; decide how many values are worth keeping */
2272 if (num_mcv
> track_cnt
)
2273 num_mcv
= track_cnt
;
2277 mcv_counts
= (int *) palloc(num_mcv
* sizeof(int));
2278 for (i
= 0; i
< num_mcv
; i
++)
2279 mcv_counts
[i
] = track
[i
].count
;
2281 num_mcv
= analyze_mcv_list(mcv_counts
, num_mcv
,
2284 samplerows
, totalrows
);
2288 /* Generate MCV slot entry */
2291 MemoryContext old_context
;
2295 /* Must copy the target values into anl_context */
2296 old_context
= MemoryContextSwitchTo(stats
->anl_context
);
2297 mcv_values
= (Datum
*) palloc(num_mcv
* sizeof(Datum
));
2298 mcv_freqs
= (float4
*) palloc(num_mcv
* sizeof(float4
));
2299 for (i
= 0; i
< num_mcv
; i
++)
2301 mcv_values
[i
] = datumCopy(track
[i
].value
,
2302 stats
->attrtype
->typbyval
,
2303 stats
->attrtype
->typlen
);
2304 mcv_freqs
[i
] = (double) track
[i
].count
/ (double) samplerows
;
2306 MemoryContextSwitchTo(old_context
);
2308 stats
->stakind
[0] = STATISTIC_KIND_MCV
;
2309 stats
->staop
[0] = mystats
->eqopr
;
2310 stats
->stacoll
[0] = stats
->attrcollid
;
2311 stats
->stanumbers
[0] = mcv_freqs
;
2312 stats
->numnumbers
[0] = num_mcv
;
2313 stats
->stavalues
[0] = mcv_values
;
2314 stats
->numvalues
[0] = num_mcv
;
2317 * Accept the defaults for stats->statypid and others. They have
2318 * been set before we were called (see vacuum.h)
2322 else if (null_cnt
> 0)
2324 /* We found only nulls; assume the column is entirely null */
2325 stats
->stats_valid
= true;
2326 stats
->stanullfrac
= 1.0;
2328 stats
->stawidth
= 0; /* "unknown" */
2330 stats
->stawidth
= stats
->attrtype
->typlen
;
2331 stats
->stadistinct
= 0.0; /* "unknown" */
2334 /* We don't need to bother cleaning up any of our temporary palloc's */
2339 * compute_scalar_stats() -- compute column statistics
2341 * We use this when we can find "=" and "<" operators for the datatype.
2343 * We determine the fraction of non-null rows, the average width, the
2344 * most common values, the (estimated) number of distinct values, the
2345 * distribution histogram, and the correlation of physical to logical order.
2347 * The desired stats can be determined fairly easily after sorting the
2348 * data values into order.
2351 compute_scalar_stats(VacAttrStatsP stats
,
2352 AnalyzeAttrFetchFunc fetchfunc
,
2358 int nonnull_cnt
= 0;
2359 int toowide_cnt
= 0;
2360 double total_width
= 0;
2361 bool is_varlena
= (!stats
->attrtype
->typbyval
&&
2362 stats
->attrtype
->typlen
== -1);
2363 bool is_varwidth
= (!stats
->attrtype
->typbyval
&&
2364 stats
->attrtype
->typlen
< 0);
2366 SortSupportData ssup
;
2370 ScalarMCVItem
*track
;
2372 int num_mcv
= stats
->attstattarget
;
2373 int num_bins
= stats
->attstattarget
;
2374 StdAnalyzeData
*mystats
= (StdAnalyzeData
*) stats
->extra_data
;
2376 values
= (ScalarItem
*) palloc(samplerows
* sizeof(ScalarItem
));
2377 tupnoLink
= (int *) palloc(samplerows
* sizeof(int));
2378 track
= (ScalarMCVItem
*) palloc(num_mcv
* sizeof(ScalarMCVItem
));
2380 memset(&ssup
, 0, sizeof(ssup
));
2381 ssup
.ssup_cxt
= CurrentMemoryContext
;
2382 ssup
.ssup_collation
= stats
->attrcollid
;
2383 ssup
.ssup_nulls_first
= false;
2386 * For now, don't perform abbreviated key conversion, because full values
2387 * are required for MCV slot generation. Supporting that optimization
2388 * would necessitate teaching compare_scalars() to call a tie-breaker.
2390 ssup
.abbreviate
= false;
2392 PrepareSortSupportFromOrderingOp(mystats
->ltopr
, &ssup
);
2394 /* Initial scan to find sortable values */
2395 for (i
= 0; i
< samplerows
; i
++)
2400 vacuum_delay_point();
2402 value
= fetchfunc(stats
, i
, &isnull
);
2404 /* Check for null/nonnull */
2413 * If it's a variable-width field, add up widths for average width
2414 * calculation. Note that if the value is toasted, we use the toasted
2415 * width. We don't bother with this calculation if it's a fixed-width
2420 total_width
+= VARSIZE_ANY(DatumGetPointer(value
));
2423 * If the value is toasted, we want to detoast it just once to
2424 * avoid repeated detoastings and resultant excess memory usage
2425 * during the comparisons. Also, check to see if the value is
2426 * excessively wide, and if so don't detoast at all --- just
2429 if (toast_raw_datum_size(value
) > WIDTH_THRESHOLD
)
2434 value
= PointerGetDatum(PG_DETOAST_DATUM(value
));
2436 else if (is_varwidth
)
2438 /* must be cstring */
2439 total_width
+= strlen(DatumGetCString(value
)) + 1;
2442 /* Add it to the list to be sorted */
2443 values
[values_cnt
].value
= value
;
2444 values
[values_cnt
].tupno
= values_cnt
;
2445 tupnoLink
[values_cnt
] = values_cnt
;
2449 /* We can only compute real stats if we found some sortable values. */
2452 int ndistinct
, /* # distinct values in sample */
2453 nmultiple
, /* # that appear multiple times */
2457 CompareScalarsContext cxt
;
2459 /* Sort the collected values */
2461 cxt
.tupnoLink
= tupnoLink
;
2462 qsort_interruptible(values
, values_cnt
, sizeof(ScalarItem
),
2463 compare_scalars
, &cxt
);
2466 * Now scan the values in order, find the most common ones, and also
2467 * accumulate ordering-correlation statistics.
2469 * To determine which are most common, we first have to count the
2470 * number of duplicates of each value. The duplicates are adjacent in
2471 * the sorted list, so a brute-force approach is to compare successive
2472 * datum values until we find two that are not equal. However, that
2473 * requires N-1 invocations of the datum comparison routine, which are
2474 * completely redundant with work that was done during the sort. (The
2475 * sort algorithm must at some point have compared each pair of items
2476 * that are adjacent in the sorted order; otherwise it could not know
2477 * that it's ordered the pair correctly.) We exploit this by having
2478 * compare_scalars remember the highest tupno index that each
2479 * ScalarItem has been found equal to. At the end of the sort, a
2480 * ScalarItem's tupnoLink will still point to itself if and only if it
2481 * is the last item of its group of duplicates (since the group will
2482 * be ordered by tupno).
2488 for (i
= 0; i
< values_cnt
; i
++)
2490 int tupno
= values
[i
].tupno
;
2492 corr_xysum
+= ((double) i
) * ((double) tupno
);
2494 if (tupnoLink
[tupno
] == tupno
)
2496 /* Reached end of duplicates of this value */
2501 if (track_cnt
< num_mcv
||
2502 dups_cnt
> track
[track_cnt
- 1].count
)
2505 * Found a new item for the mcv list; find its
2506 * position, bubbling down old items if needed. Loop
2507 * invariant is that j points at an empty/ replaceable
2512 if (track_cnt
< num_mcv
)
2514 for (j
= track_cnt
- 1; j
> 0; j
--)
2516 if (dups_cnt
<= track
[j
- 1].count
)
2518 track
[j
].count
= track
[j
- 1].count
;
2519 track
[j
].first
= track
[j
- 1].first
;
2521 track
[j
].count
= dups_cnt
;
2522 track
[j
].first
= i
+ 1 - dups_cnt
;
2529 stats
->stats_valid
= true;
2530 /* Do the simple null-frac and width stats */
2531 stats
->stanullfrac
= (double) null_cnt
/ (double) samplerows
;
2533 stats
->stawidth
= total_width
/ (double) nonnull_cnt
;
2535 stats
->stawidth
= stats
->attrtype
->typlen
;
2540 * If we found no repeated non-null values, assume it's a unique
2541 * column; but be sure to discount for any nulls we found.
2543 stats
->stadistinct
= -1.0 * (1.0 - stats
->stanullfrac
);
2545 else if (toowide_cnt
== 0 && nmultiple
== ndistinct
)
2548 * Every value in the sample appeared more than once. Assume the
2549 * column has just these values. (This case is meant to address
2550 * columns with small, fixed sets of possible values, such as
2551 * boolean or enum columns. If there are any values that appear
2552 * just once in the sample, including too-wide values, we should
2553 * assume that that's not what we're dealing with.)
2555 stats
->stadistinct
= ndistinct
;
2560 * Estimate the number of distinct values using the estimator
2561 * proposed by Haas and Stokes in IBM Research Report RJ 10025:
2562 * n*d / (n - f1 + f1*n/N)
2563 * where f1 is the number of distinct values that occurred
2564 * exactly once in our sample of n rows (from a total of N),
2565 * and d is the total number of distinct values in the sample.
2566 * This is their Duj1 estimator; the other estimators they
2567 * recommend are considerably more complex, and are numerically
2568 * very unstable when n is much smaller than N.
2570 * In this calculation, we consider only non-nulls. We used to
2571 * include rows with null values in the n and N counts, but that
2572 * leads to inaccurate answers in columns with many nulls, and
2573 * it's intuitively bogus anyway considering the desired result is
2574 * the number of distinct non-null values.
2576 * Overwidth values are assumed to have been distinct.
2579 int f1
= ndistinct
- nmultiple
+ toowide_cnt
;
2580 int d
= f1
+ nmultiple
;
2581 double n
= samplerows
- null_cnt
;
2582 double N
= totalrows
* (1.0 - stats
->stanullfrac
);
2585 /* N == 0 shouldn't happen, but just in case ... */
2587 stadistinct
= (n
* d
) / ((n
- f1
) + f1
* n
/ N
);
2591 /* Clamp to sane range in case of roundoff error */
2592 if (stadistinct
< d
)
2594 if (stadistinct
> N
)
2596 /* And round to integer */
2597 stats
->stadistinct
= floor(stadistinct
+ 0.5);
2601 * If we estimated the number of distinct values at more than 10% of
2602 * the total row count (a very arbitrary limit), then assume that
2603 * stadistinct should scale with the row count rather than be a fixed
2606 if (stats
->stadistinct
> 0.1 * totalrows
)
2607 stats
->stadistinct
= -(stats
->stadistinct
/ totalrows
);
2610 * Decide how many values are worth storing as most-common values. If
2611 * we are able to generate a complete MCV list (all the values in the
2612 * sample will fit, and we think these are all the ones in the table),
2613 * then do so. Otherwise, store only those values that are
2614 * significantly more common than the values not in the list.
2616 * Note: the first of these cases is meant to address columns with
2617 * small, fixed sets of possible values, such as boolean or enum
2618 * columns. If we can *completely* represent the column population by
2619 * an MCV list that will fit into the stats target, then we should do
2620 * so and thus provide the planner with complete information. But if
2621 * the MCV list is not complete, it's generally worth being more
2622 * selective, and not just filling it all the way up to the stats
2625 if (track_cnt
== ndistinct
&& toowide_cnt
== 0 &&
2626 stats
->stadistinct
> 0 &&
2627 track_cnt
<= num_mcv
)
2629 /* Track list includes all values seen, and all will fit */
2630 num_mcv
= track_cnt
;
2636 /* Incomplete list; decide how many values are worth keeping */
2637 if (num_mcv
> track_cnt
)
2638 num_mcv
= track_cnt
;
2642 mcv_counts
= (int *) palloc(num_mcv
* sizeof(int));
2643 for (i
= 0; i
< num_mcv
; i
++)
2644 mcv_counts
[i
] = track
[i
].count
;
2646 num_mcv
= analyze_mcv_list(mcv_counts
, num_mcv
,
2649 samplerows
, totalrows
);
2653 /* Generate MCV slot entry */
2656 MemoryContext old_context
;
2660 /* Must copy the target values into anl_context */
2661 old_context
= MemoryContextSwitchTo(stats
->anl_context
);
2662 mcv_values
= (Datum
*) palloc(num_mcv
* sizeof(Datum
));
2663 mcv_freqs
= (float4
*) palloc(num_mcv
* sizeof(float4
));
2664 for (i
= 0; i
< num_mcv
; i
++)
2666 mcv_values
[i
] = datumCopy(values
[track
[i
].first
].value
,
2667 stats
->attrtype
->typbyval
,
2668 stats
->attrtype
->typlen
);
2669 mcv_freqs
[i
] = (double) track
[i
].count
/ (double) samplerows
;
2671 MemoryContextSwitchTo(old_context
);
2673 stats
->stakind
[slot_idx
] = STATISTIC_KIND_MCV
;
2674 stats
->staop
[slot_idx
] = mystats
->eqopr
;
2675 stats
->stacoll
[slot_idx
] = stats
->attrcollid
;
2676 stats
->stanumbers
[slot_idx
] = mcv_freqs
;
2677 stats
->numnumbers
[slot_idx
] = num_mcv
;
2678 stats
->stavalues
[slot_idx
] = mcv_values
;
2679 stats
->numvalues
[slot_idx
] = num_mcv
;
2682 * Accept the defaults for stats->statypid and others. They have
2683 * been set before we were called (see vacuum.h)
2689 * Generate a histogram slot entry if there are at least two distinct
2690 * values not accounted for in the MCV list. (This ensures the
2691 * histogram won't collapse to empty or a singleton.)
2693 num_hist
= ndistinct
- num_mcv
;
2694 if (num_hist
> num_bins
)
2695 num_hist
= num_bins
+ 1;
2698 MemoryContext old_context
;
2706 /* Sort the MCV items into position order to speed next loop */
2707 qsort_interruptible(track
, num_mcv
, sizeof(ScalarMCVItem
),
2708 compare_mcvs
, NULL
);
2711 * Collapse out the MCV items from the values[] array.
2713 * Note we destroy the values[] array here... but we don't need it
2714 * for anything more. We do, however, still need values_cnt.
2715 * nvals will be the number of remaining entries in values[].
2724 j
= 0; /* index of next interesting MCV item */
2725 while (src
< values_cnt
)
2731 int first
= track
[j
].first
;
2735 /* advance past this MCV item */
2736 src
= first
+ track
[j
].count
;
2740 ncopy
= first
- src
;
2743 ncopy
= values_cnt
- src
;
2744 memmove(&values
[dest
], &values
[src
],
2745 ncopy
* sizeof(ScalarItem
));
2753 Assert(nvals
>= num_hist
);
2755 /* Must copy the target values into anl_context */
2756 old_context
= MemoryContextSwitchTo(stats
->anl_context
);
2757 hist_values
= (Datum
*) palloc(num_hist
* sizeof(Datum
));
2760 * The object of this loop is to copy the first and last values[]
2761 * entries along with evenly-spaced values in between. So the
2762 * i'th value is values[(i * (nvals - 1)) / (num_hist - 1)]. But
2763 * computing that subscript directly risks integer overflow when
2764 * the stats target is more than a couple thousand. Instead we
2765 * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
2766 * the integral and fractional parts of the sum separately.
2768 delta
= (nvals
- 1) / (num_hist
- 1);
2769 deltafrac
= (nvals
- 1) % (num_hist
- 1);
2772 for (i
= 0; i
< num_hist
; i
++)
2774 hist_values
[i
] = datumCopy(values
[pos
].value
,
2775 stats
->attrtype
->typbyval
,
2776 stats
->attrtype
->typlen
);
2778 posfrac
+= deltafrac
;
2779 if (posfrac
>= (num_hist
- 1))
2781 /* fractional part exceeds 1, carry to integer part */
2783 posfrac
-= (num_hist
- 1);
2787 MemoryContextSwitchTo(old_context
);
2789 stats
->stakind
[slot_idx
] = STATISTIC_KIND_HISTOGRAM
;
2790 stats
->staop
[slot_idx
] = mystats
->ltopr
;
2791 stats
->stacoll
[slot_idx
] = stats
->attrcollid
;
2792 stats
->stavalues
[slot_idx
] = hist_values
;
2793 stats
->numvalues
[slot_idx
] = num_hist
;
2796 * Accept the defaults for stats->statypid and others. They have
2797 * been set before we were called (see vacuum.h)
2802 /* Generate a correlation entry if there are multiple values */
2805 MemoryContext old_context
;
2810 /* Must copy the target values into anl_context */
2811 old_context
= MemoryContextSwitchTo(stats
->anl_context
);
2812 corrs
= (float4
*) palloc(sizeof(float4
));
2813 MemoryContextSwitchTo(old_context
);
2816 * Since we know the x and y value sets are both
2817 * 0, 1, ..., values_cnt-1
2818 * we have sum(x) = sum(y) =
2819 * (values_cnt-1)*values_cnt / 2
2820 * and sum(x^2) = sum(y^2) =
2821 * (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
2824 corr_xsum
= ((double) (values_cnt
- 1)) *
2825 ((double) values_cnt
) / 2.0;
2826 corr_x2sum
= ((double) (values_cnt
- 1)) *
2827 ((double) values_cnt
) * (double) (2 * values_cnt
- 1) / 6.0;
2829 /* And the correlation coefficient reduces to */
2830 corrs
[0] = (values_cnt
* corr_xysum
- corr_xsum
* corr_xsum
) /
2831 (values_cnt
* corr_x2sum
- corr_xsum
* corr_xsum
);
2833 stats
->stakind
[slot_idx
] = STATISTIC_KIND_CORRELATION
;
2834 stats
->staop
[slot_idx
] = mystats
->ltopr
;
2835 stats
->stacoll
[slot_idx
] = stats
->attrcollid
;
2836 stats
->stanumbers
[slot_idx
] = corrs
;
2837 stats
->numnumbers
[slot_idx
] = 1;
2841 else if (nonnull_cnt
> 0)
2843 /* We found some non-null values, but they were all too wide */
2844 Assert(nonnull_cnt
== toowide_cnt
);
2845 stats
->stats_valid
= true;
2846 /* Do the simple null-frac and width stats */
2847 stats
->stanullfrac
= (double) null_cnt
/ (double) samplerows
;
2849 stats
->stawidth
= total_width
/ (double) nonnull_cnt
;
2851 stats
->stawidth
= stats
->attrtype
->typlen
;
2852 /* Assume all too-wide values are distinct, so it's a unique column */
2853 stats
->stadistinct
= -1.0 * (1.0 - stats
->stanullfrac
);
2855 else if (null_cnt
> 0)
2857 /* We found only nulls; assume the column is entirely null */
2858 stats
->stats_valid
= true;
2859 stats
->stanullfrac
= 1.0;
2861 stats
->stawidth
= 0; /* "unknown" */
2863 stats
->stawidth
= stats
->attrtype
->typlen
;
2864 stats
->stadistinct
= 0.0; /* "unknown" */
2867 /* We don't need to bother cleaning up any of our temporary palloc's */
2871 * Comparator for sorting ScalarItems
2873 * Aside from sorting the items, we update the tupnoLink[] array
2874 * whenever two ScalarItems are found to contain equal datums. The array
2875 * is indexed by tupno; for each ScalarItem, it contains the highest
2876 * tupno that that item's datum has been found to be equal to. This allows
2877 * us to avoid additional comparisons in compute_scalar_stats().
2880 compare_scalars(const void *a
, const void *b
, void *arg
)
2882 Datum da
= ((const ScalarItem
*) a
)->value
;
2883 int ta
= ((const ScalarItem
*) a
)->tupno
;
2884 Datum db
= ((const ScalarItem
*) b
)->value
;
2885 int tb
= ((const ScalarItem
*) b
)->tupno
;
2886 CompareScalarsContext
*cxt
= (CompareScalarsContext
*) arg
;
2889 compare
= ApplySortComparator(da
, false, db
, false, cxt
->ssup
);
2894 * The two datums are equal, so update cxt->tupnoLink[].
2896 if (cxt
->tupnoLink
[ta
] < tb
)
2897 cxt
->tupnoLink
[ta
] = tb
;
2898 if (cxt
->tupnoLink
[tb
] < ta
)
2899 cxt
->tupnoLink
[tb
] = ta
;
2902 * For equal datums, sort by tupno
2908 * Comparator for sorting ScalarMCVItems by position
2911 compare_mcvs(const void *a
, const void *b
, void *arg
)
2913 int da
= ((const ScalarMCVItem
*) a
)->first
;
2914 int db
= ((const ScalarMCVItem
*) b
)->first
;
2920 * Analyze the list of common values in the sample and decide how many are
2921 * worth storing in the table's MCV list.
2923 * mcv_counts is assumed to be a list of the counts of the most common values
2924 * seen in the sample, starting with the most common. The return value is the
2925 * number that are significantly more common than the values not in the list,
2926 * and which are therefore deemed worth storing in the table's MCV list.
2929 analyze_mcv_list(int *mcv_counts
,
2936 double ndistinct_table
;
2941 * If the entire table was sampled, keep the whole list. This also
2942 * protects us against division by zero in the code below.
2944 if (samplerows
== totalrows
|| totalrows
<= 1.0)
2947 /* Re-extract the estimated number of distinct nonnull values in table */
2948 ndistinct_table
= stadistinct
;
2949 if (ndistinct_table
< 0)
2950 ndistinct_table
= -ndistinct_table
* totalrows
;
2953 * Exclude the least common values from the MCV list, if they are not
2954 * significantly more common than the estimated selectivity they would
2955 * have if they weren't in the list. All non-MCV values are assumed to be
2956 * equally common, after taking into account the frequencies of all the
2957 * values in the MCV list and the number of nulls (c.f. eqsel()).
2959 * Here sumcount tracks the total count of all but the last (least common)
2960 * value in the MCV list, allowing us to determine the effect of excluding
2961 * that value from the list.
2963 * Note that we deliberately do this by removing values from the full
2964 * list, rather than starting with an empty list and adding values,
2965 * because the latter approach can fail to add any values if all the most
2966 * common values have around the same frequency and make up the majority
2967 * of the table, so that the overall average frequency of all values is
2968 * roughly the same as that of the common values. This would lead to any
2969 * uncommon values being significantly overestimated.
2972 for (i
= 0; i
< num_mcv
- 1; i
++)
2973 sumcount
+= mcv_counts
[i
];
2986 * Estimated selectivity the least common value would have if it
2987 * wasn't in the MCV list (c.f. eqsel()).
2989 selec
= 1.0 - sumcount
/ samplerows
- stanullfrac
;
2994 otherdistinct
= ndistinct_table
- (num_mcv
- 1);
2995 if (otherdistinct
> 1)
2996 selec
/= otherdistinct
;
2999 * If the value is kept in the MCV list, its population frequency is
3000 * assumed to equal its sample frequency. We use the lower end of a
3001 * textbook continuity-corrected Wald-type confidence interval to
3002 * determine if that is significantly more common than the non-MCV
3003 * frequency --- specifically we assume the population frequency is
3004 * highly likely to be within around 2 standard errors of the sample
3005 * frequency, which equates to an interval of 2 standard deviations
3006 * either side of the sample count, plus an additional 0.5 for the
3007 * continuity correction. Since we are sampling without replacement,
3008 * this is a hypergeometric distribution.
3010 * XXX: Empirically, this approach seems to work quite well, but it
3011 * may be worth considering more advanced techniques for estimating
3012 * the confidence interval of the hypergeometric distribution.
3016 K
= N
* mcv_counts
[num_mcv
- 1] / n
;
3017 variance
= n
* K
* (N
- K
) * (N
- n
) / (N
* N
* (N
- 1));
3018 stddev
= sqrt(variance
);
3020 if (mcv_counts
[num_mcv
- 1] > selec
* samplerows
+ 2 * stddev
+ 0.5)
3023 * The value is significantly more common than the non-MCV
3024 * selectivity would suggest. Keep it, and all the other more
3025 * common values in the list.
3031 /* Discard this value and consider the next least common value */
3035 sumcount
-= mcv_counts
[num_mcv
- 1];