1 /*-------------------------------------------------------------------------
4 * header file for postgres hash access method implementation
7 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
13 * modeled after Margo Seltzer's hash implementation for unix.
15 *-------------------------------------------------------------------------
20 #include "access/genam.h"
21 #include "access/itup.h"
22 #include "access/sdir.h"
23 #include "access/xlog.h"
25 #include "storage/lock.h"
26 #include "utils/relcache.h"
29 * Mapping from hash bucket number to physical block number of bucket's
30 * starting page. Beware of multiple evaluations of argument!
32 typedef uint32 Bucket
;
34 #define BUCKET_TO_BLKNO(metap,B) \
35 ((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_log2((B)+1)-1] : 0)) + 1)
38 * Special space for hash index pages.
40 * hasho_flag tells us which type of page we're looking at. For
41 * example, knowing overflow pages from bucket pages is necessary
42 * information when you're deleting tuples from a page. If all the
43 * tuples are deleted from an overflow page, the overflow is made
44 * available to other buckets by calling _hash_freeovflpage(). If all
45 * the tuples are deleted from a bucket page, no additional action is
48 #define LH_UNUSED_PAGE (0)
49 #define LH_OVERFLOW_PAGE (1 << 0)
50 #define LH_BUCKET_PAGE (1 << 1)
51 #define LH_BITMAP_PAGE (1 << 2)
52 #define LH_META_PAGE (1 << 3)
54 typedef struct HashPageOpaqueData
56 BlockNumber hasho_prevblkno
; /* previous ovfl (or bucket) blkno */
57 BlockNumber hasho_nextblkno
; /* next ovfl blkno */
58 Bucket hasho_bucket
; /* bucket number this pg belongs to */
59 uint16 hasho_flag
; /* page type code, see above */
60 uint16 hasho_page_id
; /* for identification of hash indexes */
63 typedef HashPageOpaqueData
*HashPageOpaque
;
66 * The page ID is for the convenience of pg_filedump and similar utilities,
67 * which otherwise would have a hard time telling pages of different index
68 * types apart. It should be the last 2 bytes on the page. This is more or
69 * less "free" due to alignment considerations.
71 #define HASHO_PAGE_ID 0xFF80
74 * HashScanOpaqueData is private state for a hash index scan.
76 typedef struct HashScanOpaqueData
78 /* Hash value of the scan key, ie, the hash key we seek */
79 uint32 hashso_sk_hash
;
82 * By definition, a hash scan should be examining only one bucket. We
83 * record the bucket number here as soon as it is known.
86 bool hashso_bucket_valid
;
89 * If we have a share lock on the bucket, we record it here. When
90 * hashso_bucket_blkno is zero, we have no such lock.
92 BlockNumber hashso_bucket_blkno
;
95 * We also want to remember which buffer we're currently examining in the
96 * scan. We keep the buffer pinned (but not locked) across hashgettuple
97 * calls, in order to avoid doing a ReadBuffer() for every tuple in the
100 Buffer hashso_curbuf
;
102 /* Current position of the scan */
103 ItemPointerData hashso_curpos
;
104 } HashScanOpaqueData
;
106 typedef HashScanOpaqueData
*HashScanOpaque
;
109 * Definitions for metapage.
112 #define HASH_METAPAGE 0 /* metapage is always block 0 */
114 #define HASH_MAGIC 0x6440640
115 #define HASH_VERSION 2 /* 2 signifies only hash key value is stored */
118 * Spares[] holds the number of overflow pages currently allocated at or
119 * before a certain splitpoint. For example, if spares[3] = 7 then there are
120 * 7 ovflpages before splitpoint 3 (compare BUCKET_TO_BLKNO macro). The
121 * value in spares[ovflpoint] increases as overflow pages are added at the
122 * end of the index. Once ovflpoint increases (ie, we have actually allocated
123 * the bucket pages belonging to that splitpoint) the number of spares at the
124 * prior splitpoint cannot change anymore.
126 * ovflpages that have been recycled for reuse can be found by looking at
127 * bitmaps that are stored within ovflpages dedicated for the purpose.
128 * The blknos of these bitmap pages are kept in bitmaps[]; nmaps is the
129 * number of currently existing bitmaps.
131 * The limitation on the size of spares[] comes from the fact that there's
132 * no point in having more than 2^32 buckets with only uint32 hashcodes.
133 * There is no particular upper limit on the size of mapp[], other than
134 * needing to fit into the metapage. (With 8K block size, 128 bitmaps
135 * limit us to 64 Gb of overflow space...)
137 #define HASH_MAX_SPLITPOINTS 32
138 #define HASH_MAX_BITMAPS 128
140 typedef struct HashMetaPageData
142 uint32 hashm_magic
; /* magic no. for hash tables */
143 uint32 hashm_version
; /* version ID */
144 double hashm_ntuples
; /* number of tuples stored in the table */
145 uint16 hashm_ffactor
; /* target fill factor (tuples/bucket) */
146 uint16 hashm_bsize
; /* index page size (bytes) */
147 uint16 hashm_bmsize
; /* bitmap array size (bytes) - must be a power
149 uint16 hashm_bmshift
; /* log2(bitmap array size in BITS) */
150 uint32 hashm_maxbucket
; /* ID of maximum bucket in use */
151 uint32 hashm_highmask
; /* mask to modulo into entire table */
152 uint32 hashm_lowmask
; /* mask to modulo into lower half of table */
153 uint32 hashm_ovflpoint
;/* splitpoint from which ovflpgs being
155 uint32 hashm_firstfree
; /* lowest-number free ovflpage (bit#) */
156 uint32 hashm_nmaps
; /* number of bitmap pages */
157 RegProcedure hashm_procid
; /* hash procedure id from pg_proc */
158 uint32 hashm_spares
[HASH_MAX_SPLITPOINTS
]; /* spare pages before
160 BlockNumber hashm_mapp
[HASH_MAX_BITMAPS
]; /* blknos of ovfl bitmaps */
163 typedef HashMetaPageData
*HashMetaPage
;
166 * Maximum size of a hash index item (it's okay to have only one per page)
168 #define HashMaxItemSize(page) \
169 MAXALIGN_DOWN(PageGetPageSize(page) - \
170 SizeOfPageHeaderData - \
171 sizeof(ItemIdData) - \
172 MAXALIGN(sizeof(HashPageOpaqueData)))
174 #define HASH_MIN_FILLFACTOR 10
175 #define HASH_DEFAULT_FILLFACTOR 75
180 #define BYTE_TO_BIT 3 /* 2^3 bits/byte */
181 #define ALL_SET ((uint32) ~0)
184 * Bitmap pages do not contain tuples. They do contain the standard
185 * page headers and trailers; however, everything in between is a
186 * giant bit array. The number of bits that fit on a page obviously
187 * depends on the page size and the header/trailer overhead. We require
188 * the number of bits per page to be a power of 2.
190 #define BMPGSZ_BYTE(metap) ((metap)->hashm_bmsize)
191 #define BMPGSZ_BIT(metap) ((metap)->hashm_bmsize << BYTE_TO_BIT)
192 #define BMPG_SHIFT(metap) ((metap)->hashm_bmshift)
193 #define BMPG_MASK(metap) (BMPGSZ_BIT(metap) - 1)
195 #define HashPageGetBitmap(page) \
196 ((uint32 *) PageGetContents(page))
198 #define HashGetMaxBitmapSize(page) \
199 (PageGetPageSize((Page) page) - \
200 (MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(HashPageOpaqueData))))
202 #define HashPageGetMeta(page) \
203 ((HashMetaPage) PageGetContents(page))
206 * The number of bits in an ovflpage bitmap word.
208 #define BITS_PER_MAP 32 /* Number of bits in uint32 */
210 /* Given the address of the beginning of a bit map, clear/set the nth bit */
211 #define CLRBIT(A, N) ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))
212 #define SETBIT(A, N) ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))
213 #define ISSET(A, N) ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))
216 * page-level and high-level locking modes (see README)
218 #define HASH_READ BUFFER_LOCK_SHARE
219 #define HASH_WRITE BUFFER_LOCK_EXCLUSIVE
220 #define HASH_NOLOCK (-1)
222 #define HASH_SHARE ShareLock
223 #define HASH_EXCLUSIVE ExclusiveLock
226 * Strategy number. There's only one valid strategy for hashing: equality.
228 #define HTEqualStrategyNumber 1
229 #define HTMaxStrategyNumber 1
232 * When a new operator class is declared, we require that the user supply
233 * us with an amproc procudure for hashing a key of the new type.
234 * Since we only have one such proc in amproc, it's number 1.
239 /* public routines */
241 extern Datum
hashbuild(PG_FUNCTION_ARGS
);
242 extern Datum
hashinsert(PG_FUNCTION_ARGS
);
243 extern Datum
hashbeginscan(PG_FUNCTION_ARGS
);
244 extern Datum
hashgettuple(PG_FUNCTION_ARGS
);
245 extern Datum
hashgetbitmap(PG_FUNCTION_ARGS
);
246 extern Datum
hashrescan(PG_FUNCTION_ARGS
);
247 extern Datum
hashendscan(PG_FUNCTION_ARGS
);
248 extern Datum
hashmarkpos(PG_FUNCTION_ARGS
);
249 extern Datum
hashrestrpos(PG_FUNCTION_ARGS
);
250 extern Datum
hashbulkdelete(PG_FUNCTION_ARGS
);
251 extern Datum
hashvacuumcleanup(PG_FUNCTION_ARGS
);
252 extern Datum
hashoptions(PG_FUNCTION_ARGS
);
255 * Datatype-specific hash functions in hashfunc.c.
257 * These support both hash indexes and hash joins.
259 * NOTE: some of these are also used by catcache operations, without
260 * any direct connection to hash indexes. Also, the common hash_any
261 * routine is also used by dynahash tables.
263 extern Datum
hashchar(PG_FUNCTION_ARGS
);
264 extern Datum
hashint2(PG_FUNCTION_ARGS
);
265 extern Datum
hashint4(PG_FUNCTION_ARGS
);
266 extern Datum
hashint8(PG_FUNCTION_ARGS
);
267 extern Datum
hashoid(PG_FUNCTION_ARGS
);
268 extern Datum
hashenum(PG_FUNCTION_ARGS
);
269 extern Datum
hashfloat4(PG_FUNCTION_ARGS
);
270 extern Datum
hashfloat8(PG_FUNCTION_ARGS
);
271 extern Datum
hashoidvector(PG_FUNCTION_ARGS
);
272 extern Datum
hashint2vector(PG_FUNCTION_ARGS
);
273 extern Datum
hashname(PG_FUNCTION_ARGS
);
274 extern Datum
hashtext(PG_FUNCTION_ARGS
);
275 extern Datum
hashvarlena(PG_FUNCTION_ARGS
);
276 extern Datum
hash_any(register const unsigned char *k
, register int keylen
);
277 extern Datum
hash_uint32(uint32 k
);
279 /* private routines */
282 extern void _hash_doinsert(Relation rel
, IndexTuple itup
);
285 extern Buffer
_hash_addovflpage(Relation rel
, Buffer metabuf
, Buffer buf
);
286 extern BlockNumber
_hash_freeovflpage(Relation rel
, Buffer ovflbuf
,
287 BufferAccessStrategy bstrategy
);
288 extern void _hash_initbitmap(Relation rel
, HashMetaPage metap
,
290 extern void _hash_squeezebucket(Relation rel
,
291 Bucket bucket
, BlockNumber bucket_blkno
,
292 BufferAccessStrategy bstrategy
);
295 extern void _hash_getlock(Relation rel
, BlockNumber whichlock
, int access
);
296 extern bool _hash_try_getlock(Relation rel
, BlockNumber whichlock
, int access
);
297 extern void _hash_droplock(Relation rel
, BlockNumber whichlock
, int access
);
298 extern Buffer
_hash_getbuf(Relation rel
, BlockNumber blkno
,
299 int access
, int flags
);
300 extern Buffer
_hash_getinitbuf(Relation rel
, BlockNumber blkno
);
301 extern Buffer
_hash_getnewbuf(Relation rel
, BlockNumber blkno
);
302 extern Buffer
_hash_getbuf_with_strategy(Relation rel
, BlockNumber blkno
,
303 int access
, int flags
,
304 BufferAccessStrategy bstrategy
);
305 extern void _hash_relbuf(Relation rel
, Buffer buf
);
306 extern void _hash_dropbuf(Relation rel
, Buffer buf
);
307 extern void _hash_wrtbuf(Relation rel
, Buffer buf
);
308 extern void _hash_chgbufaccess(Relation rel
, Buffer buf
, int from_access
,
310 extern uint32
_hash_metapinit(Relation rel
, double num_tuples
);
311 extern void _hash_pageinit(Page page
, Size size
);
312 extern void _hash_expandtable(Relation rel
, Buffer metabuf
);
315 extern void _hash_regscan(IndexScanDesc scan
);
316 extern void _hash_dropscan(IndexScanDesc scan
);
317 extern bool _hash_has_active_scan(Relation rel
, Bucket bucket
);
318 extern void ReleaseResources_hash(void);
321 extern bool _hash_next(IndexScanDesc scan
, ScanDirection dir
);
322 extern bool _hash_first(IndexScanDesc scan
, ScanDirection dir
);
323 extern bool _hash_step(IndexScanDesc scan
, Buffer
*bufP
, ScanDirection dir
);
326 typedef struct HSpool HSpool
; /* opaque struct in hashsort.c */
328 extern HSpool
*_h_spoolinit(Relation index
, uint32 num_buckets
);
329 extern void _h_spooldestroy(HSpool
*hspool
);
330 extern void _h_spool(IndexTuple itup
, HSpool
*hspool
);
331 extern void _h_indexbuild(HSpool
*hspool
);
334 extern bool _hash_checkqual(IndexScanDesc scan
, IndexTuple itup
);
335 extern uint32
_hash_datum2hashkey(Relation rel
, Datum key
);
336 extern uint32
_hash_datum2hashkey_type(Relation rel
, Datum key
, Oid keytype
);
337 extern Bucket
_hash_hashkey2bucket(uint32 hashkey
, uint32 maxbucket
,
338 uint32 highmask
, uint32 lowmask
);
339 extern uint32
_hash_log2(uint32 num
);
340 extern void _hash_checkpage(Relation rel
, Buffer buf
, int flags
);
341 extern uint32
_hash_get_indextuple_hashkey(IndexTuple itup
);
342 extern IndexTuple
_hash_form_tuple(Relation index
,
343 Datum
*values
, bool *isnull
);
344 extern OffsetNumber
_hash_binsearch(Page page
, uint32 hash_value
);
345 extern OffsetNumber
_hash_binsearch_last(Page page
, uint32 hash_value
);
348 extern void hash_redo(XLogRecPtr lsn
, XLogRecord
*record
);
349 extern void hash_desc(StringInfo buf
, uint8 xl_info
, char *rec
);