7 * Copyright (c) 2006 Satoshi Nagayasu <nagayasus@nttdata.co.jp>
9 * Permission to use, copy, modify, and distribute this software and
10 * its documentation for any purpose, without fee, and without a
11 * written agreement is hereby granted, provided that the above
12 * copyright notice and this paragraph and the following two
13 * paragraphs appear in all copies.
15 * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
16 * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
17 * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
18 * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
19 * OF THE POSSIBILITY OF SUCH DAMAGE.
21 * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
24 * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
25 * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
30 #include "access/heapam.h"
31 #include "access/nbtree.h"
32 #include "catalog/namespace.h"
33 #include "catalog/pg_type.h"
35 #include "miscadmin.h"
36 #include "storage/bufmgr.h"
37 #include "utils/builtins.h"
40 extern Datum
bt_metap(PG_FUNCTION_ARGS
);
41 extern Datum
bt_page_items(PG_FUNCTION_ARGS
);
42 extern Datum
bt_page_stats(PG_FUNCTION_ARGS
);
44 PG_FUNCTION_INFO_V1(bt_metap
);
45 PG_FUNCTION_INFO_V1(bt_page_items
);
46 PG_FUNCTION_INFO_V1(bt_page_stats
);
48 #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
49 #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
51 #define CHECK_PAGE_OFFSET_RANGE(pg, offnum) { \
52 if ( !(FirstOffsetNumber <= (offnum) && \
53 (offnum) <= PageGetMaxOffsetNumber(pg)) ) \
54 elog(ERROR, "page offset number out of range"); }
56 /* note: BlockNumber is unsigned, hence can't be negative */
57 #define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \
58 if ( RelationGetNumberOfBlocks(rel) <= (BlockNumber) (blkno) ) \
59 elog(ERROR, "block number out of range"); }
61 /* ------------------------------------------------
62 * structure for single btree page statistics
63 * ------------------------------------------------
65 typedef struct BTPageStat
77 BlockNumber btpo_prev
;
78 BlockNumber btpo_next
;
85 BTCycleId btpo_cycleid
;
89 /* -------------------------------------------------
90 * GetBTPageStatistics()
92 * Collect statistics of single b-tree page
93 * -------------------------------------------------
96 GetBTPageStatistics(BlockNumber blkno
, Buffer buffer
, BTPageStat
*stat
)
98 Page page
= BufferGetPage(buffer
);
99 PageHeader phdr
= (PageHeader
) page
;
100 OffsetNumber maxoff
= PageGetMaxOffsetNumber(page
);
101 BTPageOpaque opaque
= (BTPageOpaque
) PageGetSpecialPointer(page
);
107 stat
->max_avail
= BLCKSZ
- (BLCKSZ
- phdr
->pd_special
+ SizeOfPageHeaderData
);
109 stat
->dead_items
= stat
->live_items
= 0;
111 stat
->page_size
= PageGetPageSize(page
);
113 /* page type (flags) */
114 if (P_ISDELETED(opaque
))
117 stat
->btpo
.xact
= opaque
->btpo
.xact
;
120 else if (P_IGNORE(opaque
))
122 else if (P_ISLEAF(opaque
))
124 else if (P_ISROOT(opaque
))
129 /* btpage opaque data */
130 stat
->btpo_prev
= opaque
->btpo_prev
;
131 stat
->btpo_next
= opaque
->btpo_next
;
132 stat
->btpo
.level
= opaque
->btpo
.level
;
133 stat
->btpo_flags
= opaque
->btpo_flags
;
134 stat
->btpo_cycleid
= opaque
->btpo_cycleid
;
136 /* count live and dead tuples, and free space */
137 for (off
= FirstOffsetNumber
; off
<= maxoff
; off
++)
141 ItemId id
= PageGetItemId(page
, off
);
143 itup
= (IndexTuple
) PageGetItem(page
, id
);
145 item_size
+= IndexTupleSize(itup
);
147 if (!ItemIdIsDead(id
))
152 stat
->free_size
= PageGetFreeSpace(page
);
154 if ((stat
->live_items
+ stat
->dead_items
) > 0)
155 stat
->avg_item_size
= item_size
/ (stat
->live_items
+ stat
->dead_items
);
157 stat
->avg_item_size
= 0;
160 /* -----------------------------------------------
163 * Usage: SELECT * FROM bt_page('t1_pkey', 1);
164 * -----------------------------------------------
167 bt_page_stats(PG_FUNCTION_ARGS
)
169 text
*relname
= PG_GETARG_TEXT_P(0);
170 uint32 blkno
= PG_GETARG_UINT32(1);
183 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE
),
184 (errmsg("must be superuser to use pageinspect functions"))));
186 relrv
= makeRangeVarFromNameList(textToQualifiedNameList(relname
));
187 rel
= relation_openrv(relrv
, AccessShareLock
);
189 if (!IS_INDEX(rel
) || !IS_BTREE(rel
))
190 elog(ERROR
, "relation \"%s\" is not a btree index",
191 RelationGetRelationName(rel
));
194 * Reject attempts to read non-local temporary relations; we would be
195 * likely to get wrong data since we have no visibility into the owning
196 * session's local buffers.
198 if (RELATION_IS_OTHER_TEMP(rel
))
200 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
201 errmsg("cannot access temporary tables of other sessions")));
204 elog(ERROR
, "block 0 is a meta page");
206 CHECK_RELATION_BLOCK_RANGE(rel
, blkno
);
208 buffer
= ReadBuffer(rel
, blkno
);
210 /* keep compiler quiet */
211 stat
.btpo_prev
= stat
.btpo_next
= InvalidBlockNumber
;
212 stat
.btpo_flags
= stat
.free_size
= stat
.avg_item_size
= 0;
214 GetBTPageStatistics(blkno
, buffer
, &stat
);
216 /* Build a tuple descriptor for our result type */
217 if (get_call_result_type(fcinfo
, NULL
, &tupleDesc
) != TYPEFUNC_COMPOSITE
)
218 elog(ERROR
, "return type must be a row type");
221 values
[j
] = palloc(32);
222 snprintf(values
[j
++], 32, "%d", stat
.blkno
);
223 values
[j
] = palloc(32);
224 snprintf(values
[j
++], 32, "%c", stat
.type
);
225 values
[j
] = palloc(32);
226 snprintf(values
[j
++], 32, "%d", stat
.live_items
);
227 values
[j
] = palloc(32);
228 snprintf(values
[j
++], 32, "%d", stat
.dead_items
);
229 values
[j
] = palloc(32);
230 snprintf(values
[j
++], 32, "%d", stat
.avg_item_size
);
231 values
[j
] = palloc(32);
232 snprintf(values
[j
++], 32, "%d", stat
.page_size
);
233 values
[j
] = palloc(32);
234 snprintf(values
[j
++], 32, "%d", stat
.free_size
);
235 values
[j
] = palloc(32);
236 snprintf(values
[j
++], 32, "%d", stat
.btpo_prev
);
237 values
[j
] = palloc(32);
238 snprintf(values
[j
++], 32, "%d", stat
.btpo_next
);
239 values
[j
] = palloc(32);
240 if (stat
.type
== 'd')
241 snprintf(values
[j
++], 32, "%d", stat
.btpo
.xact
);
243 snprintf(values
[j
++], 32, "%d", stat
.btpo
.level
);
244 values
[j
] = palloc(32);
245 snprintf(values
[j
++], 32, "%d", stat
.btpo_flags
);
247 tuple
= BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc
),
250 result
= HeapTupleGetDatum(tuple
);
252 ReleaseBuffer(buffer
);
254 relation_close(rel
, AccessShareLock
);
256 PG_RETURN_DATUM(result
);
259 /*-------------------------------------------------------
262 * Get IndexTupleData set in a btree page
264 * Usage: SELECT * FROM bt_page_items('t1_pkey', 1);
265 *-------------------------------------------------------
269 * cross-call data structure for SRF
278 bt_page_items(PG_FUNCTION_ARGS
)
280 text
*relname
= PG_GETARG_TEXT_P(0);
281 uint32 blkno
= PG_GETARG_UINT32(1);
285 FuncCallContext
*fctx
;
287 struct user_args
*uargs
;
291 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE
),
292 (errmsg("must be superuser to use pageinspect functions"))));
294 if (SRF_IS_FIRSTCALL())
302 fctx
= SRF_FIRSTCALL_INIT();
304 relrv
= makeRangeVarFromNameList(textToQualifiedNameList(relname
));
305 rel
= relation_openrv(relrv
, AccessShareLock
);
307 if (!IS_INDEX(rel
) || !IS_BTREE(rel
))
308 elog(ERROR
, "relation \"%s\" is not a btree index",
309 RelationGetRelationName(rel
));
312 * Reject attempts to read non-local temporary relations; we would be
313 * likely to get wrong data since we have no visibility into the
314 * owning session's local buffers.
316 if (RELATION_IS_OTHER_TEMP(rel
))
318 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
319 errmsg("cannot access temporary tables of other sessions")));
322 elog(ERROR
, "block 0 is a meta page");
324 CHECK_RELATION_BLOCK_RANGE(rel
, blkno
);
326 buffer
= ReadBuffer(rel
, blkno
);
329 * We copy the page into local storage to avoid holding pin on the
330 * buffer longer than we must, and possibly failing to release it at
331 * all if the calling query doesn't fetch all rows.
333 mctx
= MemoryContextSwitchTo(fctx
->multi_call_memory_ctx
);
335 uargs
= palloc(sizeof(struct user_args
));
337 uargs
->page
= palloc(BLCKSZ
);
338 memcpy(uargs
->page
, BufferGetPage(buffer
), BLCKSZ
);
340 ReleaseBuffer(buffer
);
341 relation_close(rel
, AccessShareLock
);
343 uargs
->offset
= FirstOffsetNumber
;
345 opaque
= (BTPageOpaque
) PageGetSpecialPointer(uargs
->page
);
347 if (P_ISDELETED(opaque
))
348 elog(NOTICE
, "page is deleted");
350 fctx
->max_calls
= PageGetMaxOffsetNumber(uargs
->page
);
352 /* Build a tuple descriptor for our result type */
353 if (get_call_result_type(fcinfo
, NULL
, &tupleDesc
) != TYPEFUNC_COMPOSITE
)
354 elog(ERROR
, "return type must be a row type");
356 fctx
->attinmeta
= TupleDescGetAttInMetadata(tupleDesc
);
358 fctx
->user_fctx
= uargs
;
360 MemoryContextSwitchTo(mctx
);
363 fctx
= SRF_PERCALL_SETUP();
364 uargs
= fctx
->user_fctx
;
366 if (fctx
->call_cntr
< fctx
->max_calls
)
376 id
= PageGetItemId(uargs
->page
, uargs
->offset
);
378 if (!ItemIdIsValid(id
))
379 elog(ERROR
, "invalid ItemId");
381 itup
= (IndexTuple
) PageGetItem(uargs
->page
, id
);
384 values
[j
] = palloc(32);
385 snprintf(values
[j
++], 32, "%d", uargs
->offset
);
386 values
[j
] = palloc(32);
387 snprintf(values
[j
++], 32, "(%u,%u)",
388 BlockIdGetBlockNumber(&(itup
->t_tid
.ip_blkid
)),
389 itup
->t_tid
.ip_posid
);
390 values
[j
] = palloc(32);
391 snprintf(values
[j
++], 32, "%d", (int) IndexTupleSize(itup
));
392 values
[j
] = palloc(32);
393 snprintf(values
[j
++], 32, "%c", IndexTupleHasNulls(itup
) ? 't' : 'f');
394 values
[j
] = palloc(32);
395 snprintf(values
[j
++], 32, "%c", IndexTupleHasVarwidths(itup
) ? 't' : 'f');
397 ptr
= (char *) itup
+ IndexInfoFindDataOffset(itup
->t_info
);
398 dlen
= IndexTupleSize(itup
) - IndexInfoFindDataOffset(itup
->t_info
);
399 dump
= palloc0(dlen
* 3 + 1);
401 for (off
= 0; off
< dlen
; off
++)
405 sprintf(dump
, "%02x", *(ptr
+ off
) & 0xff);
409 tuple
= BuildTupleFromCStrings(fctx
->attinmeta
, values
);
410 result
= HeapTupleGetDatum(tuple
);
412 uargs
->offset
= uargs
->offset
+ 1;
414 SRF_RETURN_NEXT(fctx
, result
);
420 SRF_RETURN_DONE(fctx
);
425 /* ------------------------------------------------
428 * Get a btree's meta-page information
430 * Usage: SELECT * FROM bt_metap('t1_pkey')
431 * ------------------------------------------------
434 bt_metap(PG_FUNCTION_ARGS
)
436 text
*relname
= PG_GETARG_TEXT_P(0);
440 BTMetaPageData
*metad
;
450 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE
),
451 (errmsg("must be superuser to use pageinspect functions"))));
453 relrv
= makeRangeVarFromNameList(textToQualifiedNameList(relname
));
454 rel
= relation_openrv(relrv
, AccessShareLock
);
456 if (!IS_INDEX(rel
) || !IS_BTREE(rel
))
457 elog(ERROR
, "relation \"%s\" is not a btree index",
458 RelationGetRelationName(rel
));
461 * Reject attempts to read non-local temporary relations; we would be
462 * likely to get wrong data since we have no visibility into the owning
463 * session's local buffers.
465 if (RELATION_IS_OTHER_TEMP(rel
))
467 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
468 errmsg("cannot access temporary tables of other sessions")));
470 buffer
= ReadBuffer(rel
, 0);
471 page
= BufferGetPage(buffer
);
472 metad
= BTPageGetMeta(page
);
474 /* Build a tuple descriptor for our result type */
475 if (get_call_result_type(fcinfo
, NULL
, &tupleDesc
) != TYPEFUNC_COMPOSITE
)
476 elog(ERROR
, "return type must be a row type");
479 values
[j
] = palloc(32);
480 snprintf(values
[j
++], 32, "%d", metad
->btm_magic
);
481 values
[j
] = palloc(32);
482 snprintf(values
[j
++], 32, "%d", metad
->btm_version
);
483 values
[j
] = palloc(32);
484 snprintf(values
[j
++], 32, "%d", metad
->btm_root
);
485 values
[j
] = palloc(32);
486 snprintf(values
[j
++], 32, "%d", metad
->btm_level
);
487 values
[j
] = palloc(32);
488 snprintf(values
[j
++], 32, "%d", metad
->btm_fastroot
);
489 values
[j
] = palloc(32);
490 snprintf(values
[j
++], 32, "%d", metad
->btm_fastlevel
);
492 tuple
= BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc
),
495 result
= HeapTupleGetDatum(tuple
);
497 ReleaseBuffer(buffer
);
499 relation_close(rel
, AccessShareLock
);
501 PG_RETURN_DATUM(result
);