1 /*-------------------------------------------------------------------------
4 * functions for instrumentation of plan execution
7 * Copyright (c) 2001-2024, PostgreSQL Global Development Group
10 * src/backend/executor/instrument.c
12 *-------------------------------------------------------------------------
18 #include "executor/instrument.h"
20 BufferUsage pgBufferUsage
;
21 static BufferUsage save_pgBufferUsage
;
23 static WalUsage save_pgWalUsage
;
25 static void BufferUsageAdd(BufferUsage
*dst
, const BufferUsage
*add
);
26 static void WalUsageAdd(WalUsage
*dst
, WalUsage
*add
);
29 /* Allocate new instrumentation structure(s) */
31 InstrAlloc(int n
, int instrument_options
, bool async_mode
)
33 Instrumentation
*instr
;
35 /* initialize all fields to zeroes, then modify as needed */
36 instr
= palloc0(n
* sizeof(Instrumentation
));
37 if (instrument_options
& (INSTRUMENT_BUFFERS
| INSTRUMENT_TIMER
| INSTRUMENT_WAL
))
39 bool need_buffers
= (instrument_options
& INSTRUMENT_BUFFERS
) != 0;
40 bool need_wal
= (instrument_options
& INSTRUMENT_WAL
) != 0;
41 bool need_timer
= (instrument_options
& INSTRUMENT_TIMER
) != 0;
44 for (i
= 0; i
< n
; i
++)
46 instr
[i
].need_bufusage
= need_buffers
;
47 instr
[i
].need_walusage
= need_wal
;
48 instr
[i
].need_timer
= need_timer
;
49 instr
[i
].async_mode
= async_mode
;
56 /* Initialize a pre-allocated instrumentation structure. */
58 InstrInit(Instrumentation
*instr
, int instrument_options
)
60 memset(instr
, 0, sizeof(Instrumentation
));
61 instr
->need_bufusage
= (instrument_options
& INSTRUMENT_BUFFERS
) != 0;
62 instr
->need_walusage
= (instrument_options
& INSTRUMENT_WAL
) != 0;
63 instr
->need_timer
= (instrument_options
& INSTRUMENT_TIMER
) != 0;
66 /* Entry to a plan node */
68 InstrStartNode(Instrumentation
*instr
)
70 if (instr
->need_timer
&&
71 !INSTR_TIME_SET_CURRENT_LAZY(instr
->starttime
))
72 elog(ERROR
, "InstrStartNode called twice in a row");
74 /* save buffer usage totals at node entry, if needed */
75 if (instr
->need_bufusage
)
76 instr
->bufusage_start
= pgBufferUsage
;
78 if (instr
->need_walusage
)
79 instr
->walusage_start
= pgWalUsage
;
82 /* Exit from a plan node */
84 InstrStopNode(Instrumentation
*instr
, double nTuples
)
86 double save_tuplecount
= instr
->tuplecount
;
89 /* count the returned tuples */
90 instr
->tuplecount
+= nTuples
;
92 /* let's update the time only if the timer was requested */
93 if (instr
->need_timer
)
95 if (INSTR_TIME_IS_ZERO(instr
->starttime
))
96 elog(ERROR
, "InstrStopNode called without start");
98 INSTR_TIME_SET_CURRENT(endtime
);
99 INSTR_TIME_ACCUM_DIFF(instr
->counter
, endtime
, instr
->starttime
);
101 INSTR_TIME_SET_ZERO(instr
->starttime
);
104 /* Add delta of buffer usage since entry to node's totals */
105 if (instr
->need_bufusage
)
106 BufferUsageAccumDiff(&instr
->bufusage
,
107 &pgBufferUsage
, &instr
->bufusage_start
);
109 if (instr
->need_walusage
)
110 WalUsageAccumDiff(&instr
->walusage
,
111 &pgWalUsage
, &instr
->walusage_start
);
113 /* Is this the first tuple of this cycle? */
116 instr
->running
= true;
117 instr
->firsttuple
= INSTR_TIME_GET_DOUBLE(instr
->counter
);
122 * In async mode, if the plan node hadn't emitted any tuples before,
123 * this might be the first tuple
125 if (instr
->async_mode
&& save_tuplecount
< 1.0)
126 instr
->firsttuple
= INSTR_TIME_GET_DOUBLE(instr
->counter
);
130 /* Update tuple count */
132 InstrUpdateTupleCount(Instrumentation
*instr
, double nTuples
)
134 /* count the returned tuples */
135 instr
->tuplecount
+= nTuples
;
138 /* Finish a run cycle for a plan node */
140 InstrEndLoop(Instrumentation
*instr
)
144 /* Skip if nothing has happened, or already shut down */
148 if (!INSTR_TIME_IS_ZERO(instr
->starttime
))
149 elog(ERROR
, "InstrEndLoop called on running node");
151 /* Accumulate per-cycle statistics into totals */
152 totaltime
= INSTR_TIME_GET_DOUBLE(instr
->counter
);
154 instr
->startup
+= instr
->firsttuple
;
155 instr
->total
+= totaltime
;
156 instr
->ntuples
+= instr
->tuplecount
;
159 /* Reset for next cycle (if any) */
160 instr
->running
= false;
161 INSTR_TIME_SET_ZERO(instr
->starttime
);
162 INSTR_TIME_SET_ZERO(instr
->counter
);
163 instr
->firsttuple
= 0;
164 instr
->tuplecount
= 0;
167 /* aggregate instrumentation information */
169 InstrAggNode(Instrumentation
*dst
, Instrumentation
*add
)
171 if (!dst
->running
&& add
->running
)
174 dst
->firsttuple
= add
->firsttuple
;
176 else if (dst
->running
&& add
->running
&& dst
->firsttuple
> add
->firsttuple
)
177 dst
->firsttuple
= add
->firsttuple
;
179 INSTR_TIME_ADD(dst
->counter
, add
->counter
);
181 dst
->tuplecount
+= add
->tuplecount
;
182 dst
->startup
+= add
->startup
;
183 dst
->total
+= add
->total
;
184 dst
->ntuples
+= add
->ntuples
;
185 dst
->ntuples2
+= add
->ntuples2
;
186 dst
->nloops
+= add
->nloops
;
187 dst
->nfiltered1
+= add
->nfiltered1
;
188 dst
->nfiltered2
+= add
->nfiltered2
;
190 /* Add delta of buffer usage since entry to node's totals */
191 if (dst
->need_bufusage
)
192 BufferUsageAdd(&dst
->bufusage
, &add
->bufusage
);
194 if (dst
->need_walusage
)
195 WalUsageAdd(&dst
->walusage
, &add
->walusage
);
198 /* note current values during parallel executor startup */
200 InstrStartParallelQuery(void)
202 save_pgBufferUsage
= pgBufferUsage
;
203 save_pgWalUsage
= pgWalUsage
;
206 /* report usage after parallel executor shutdown */
208 InstrEndParallelQuery(BufferUsage
*bufusage
, WalUsage
*walusage
)
210 memset(bufusage
, 0, sizeof(BufferUsage
));
211 BufferUsageAccumDiff(bufusage
, &pgBufferUsage
, &save_pgBufferUsage
);
212 memset(walusage
, 0, sizeof(WalUsage
));
213 WalUsageAccumDiff(walusage
, &pgWalUsage
, &save_pgWalUsage
);
216 /* accumulate work done by workers in leader's stats */
218 InstrAccumParallelQuery(BufferUsage
*bufusage
, WalUsage
*walusage
)
220 BufferUsageAdd(&pgBufferUsage
, bufusage
);
221 WalUsageAdd(&pgWalUsage
, walusage
);
226 BufferUsageAdd(BufferUsage
*dst
, const BufferUsage
*add
)
228 dst
->shared_blks_hit
+= add
->shared_blks_hit
;
229 dst
->shared_blks_read
+= add
->shared_blks_read
;
230 dst
->shared_blks_dirtied
+= add
->shared_blks_dirtied
;
231 dst
->shared_blks_written
+= add
->shared_blks_written
;
232 dst
->local_blks_hit
+= add
->local_blks_hit
;
233 dst
->local_blks_read
+= add
->local_blks_read
;
234 dst
->local_blks_dirtied
+= add
->local_blks_dirtied
;
235 dst
->local_blks_written
+= add
->local_blks_written
;
236 dst
->temp_blks_read
+= add
->temp_blks_read
;
237 dst
->temp_blks_written
+= add
->temp_blks_written
;
238 INSTR_TIME_ADD(dst
->shared_blk_read_time
, add
->shared_blk_read_time
);
239 INSTR_TIME_ADD(dst
->shared_blk_write_time
, add
->shared_blk_write_time
);
240 INSTR_TIME_ADD(dst
->local_blk_read_time
, add
->local_blk_read_time
);
241 INSTR_TIME_ADD(dst
->local_blk_write_time
, add
->local_blk_write_time
);
242 INSTR_TIME_ADD(dst
->temp_blk_read_time
, add
->temp_blk_read_time
);
243 INSTR_TIME_ADD(dst
->temp_blk_write_time
, add
->temp_blk_write_time
);
246 /* dst += add - sub */
248 BufferUsageAccumDiff(BufferUsage
*dst
,
249 const BufferUsage
*add
,
250 const BufferUsage
*sub
)
252 dst
->shared_blks_hit
+= add
->shared_blks_hit
- sub
->shared_blks_hit
;
253 dst
->shared_blks_read
+= add
->shared_blks_read
- sub
->shared_blks_read
;
254 dst
->shared_blks_dirtied
+= add
->shared_blks_dirtied
- sub
->shared_blks_dirtied
;
255 dst
->shared_blks_written
+= add
->shared_blks_written
- sub
->shared_blks_written
;
256 dst
->local_blks_hit
+= add
->local_blks_hit
- sub
->local_blks_hit
;
257 dst
->local_blks_read
+= add
->local_blks_read
- sub
->local_blks_read
;
258 dst
->local_blks_dirtied
+= add
->local_blks_dirtied
- sub
->local_blks_dirtied
;
259 dst
->local_blks_written
+= add
->local_blks_written
- sub
->local_blks_written
;
260 dst
->temp_blks_read
+= add
->temp_blks_read
- sub
->temp_blks_read
;
261 dst
->temp_blks_written
+= add
->temp_blks_written
- sub
->temp_blks_written
;
262 INSTR_TIME_ACCUM_DIFF(dst
->shared_blk_read_time
,
263 add
->shared_blk_read_time
, sub
->shared_blk_read_time
);
264 INSTR_TIME_ACCUM_DIFF(dst
->shared_blk_write_time
,
265 add
->shared_blk_write_time
, sub
->shared_blk_write_time
);
266 INSTR_TIME_ACCUM_DIFF(dst
->local_blk_read_time
,
267 add
->local_blk_read_time
, sub
->local_blk_read_time
);
268 INSTR_TIME_ACCUM_DIFF(dst
->local_blk_write_time
,
269 add
->local_blk_write_time
, sub
->local_blk_write_time
);
270 INSTR_TIME_ACCUM_DIFF(dst
->temp_blk_read_time
,
271 add
->temp_blk_read_time
, sub
->temp_blk_read_time
);
272 INSTR_TIME_ACCUM_DIFF(dst
->temp_blk_write_time
,
273 add
->temp_blk_write_time
, sub
->temp_blk_write_time
);
276 /* helper functions for WAL usage accumulation */
278 WalUsageAdd(WalUsage
*dst
, WalUsage
*add
)
280 dst
->wal_bytes
+= add
->wal_bytes
;
281 dst
->wal_records
+= add
->wal_records
;
282 dst
->wal_fpi
+= add
->wal_fpi
;
286 WalUsageAccumDiff(WalUsage
*dst
, const WalUsage
*add
, const WalUsage
*sub
)
288 dst
->wal_bytes
+= add
->wal_bytes
- sub
->wal_bytes
;
289 dst
->wal_records
+= add
->wal_records
- sub
->wal_records
;
290 dst
->wal_fpi
+= add
->wal_fpi
- sub
->wal_fpi
;