1 /*-------------------------------------------------------------------------
4 * PostgreSQL generic bitmap set package
6 * A bitmap set can represent any set of nonnegative integers, although
7 * it is mainly intended for sets where the maximum value is not large,
8 * say at most a few hundred. By convention, a NULL pointer is always
9 * accepted by all operations to represent the empty set. (But beware
10 * that this is not the only representation of the empty set. Use
11 * bms_is_empty() in preference to testing for NULL.)
14 * Copyright (c) 2003-2009, PostgreSQL Global Development Group
19 *-------------------------------------------------------------------------
23 #include "nodes/bitmapset.h"
24 #include "access/hash.h"
27 #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
28 #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
30 #define BITMAPSET_SIZE(nwords) \
31 (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
34 * This is a well-known cute trick for isolating the rightmost one-bit
35 * in a word. It assumes two's complement arithmetic. Consider any
36 * nonzero value, and focus attention on the rightmost one. The value is
39 * where x's are unspecified bits. The two's complement negative is formed
40 * by inverting all the bits and adding one. Inversion gives
42 * where each y is the inverse of the corresponding x. Incrementing gives
44 * and then ANDing with the original value gives
46 * This works for all cases except original value = zero, where of course
50 #define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x)))
52 #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
56 * Lookup tables to avoid need for bit-by-bit groveling
58 * rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit
59 * in a nonzero byte value x. The entry for x=0 is never used.
61 * number_of_ones[x] gives the number of one-bits (0-8) in a byte value x.
63 * We could make these tables larger and reduce the number of iterations
64 * in the functions that use them, but bytewise shifts and masks are
65 * especially fast on many machines, so working a byte at a time seems best.
68 static const uint8 rightmost_one_pos
[256] = {
69 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
79 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
80 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
81 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
82 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
83 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
84 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
87 static const uint8 number_of_ones
[256] = {
88 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
108 * bms_copy - make a palloc'd copy of a bitmapset
111 bms_copy(const Bitmapset
*a
)
118 size
= BITMAPSET_SIZE(a
->nwords
);
119 result
= (Bitmapset
*) palloc(size
);
120 memcpy(result
, a
, size
);
125 * bms_equal - are two bitmapsets equal?
127 * This is logical not physical equality; in particular, a NULL pointer will
128 * be reported as equal to a palloc'd value containing no members.
131 bms_equal(const Bitmapset
*a
, const Bitmapset
*b
)
133 const Bitmapset
*shorter
;
134 const Bitmapset
*longer
;
139 /* Handle cases where either input is NULL */
144 return bms_is_empty(b
);
147 return bms_is_empty(a
);
148 /* Identify shorter and longer input */
149 if (a
->nwords
<= b
->nwords
)
160 shortlen
= shorter
->nwords
;
161 for (i
= 0; i
< shortlen
; i
++)
163 if (shorter
->words
[i
] != longer
->words
[i
])
166 longlen
= longer
->nwords
;
167 for (; i
< longlen
; i
++)
169 if (longer
->words
[i
] != 0)
176 * bms_make_singleton - build a bitmapset containing a single member
179 bms_make_singleton(int x
)
186 elog(ERROR
, "negative bitmapset member not allowed");
187 wordnum
= WORDNUM(x
);
189 result
= (Bitmapset
*) palloc0(BITMAPSET_SIZE(wordnum
+ 1));
190 result
->nwords
= wordnum
+ 1;
191 result
->words
[wordnum
] = ((bitmapword
) 1 << bitnum
);
196 * bms_free - free a bitmapset
198 * Same as pfree except for allowing NULL input
201 bms_free(Bitmapset
*a
)
209 * These operations all make a freshly palloc'd result,
210 * leaving their inputs untouched
215 * bms_union - set union
218 bms_union(const Bitmapset
*a
, const Bitmapset
*b
)
221 const Bitmapset
*other
;
225 /* Handle cases where either input is NULL */
230 /* Identify shorter and longer input; copy the longer one */
231 if (a
->nwords
<= b
->nwords
)
233 result
= bms_copy(b
);
238 result
= bms_copy(a
);
241 /* And union the shorter input into the result */
242 otherlen
= other
->nwords
;
243 for (i
= 0; i
< otherlen
; i
++)
244 result
->words
[i
] |= other
->words
[i
];
249 * bms_intersect - set intersection
252 bms_intersect(const Bitmapset
*a
, const Bitmapset
*b
)
255 const Bitmapset
*other
;
259 /* Handle cases where either input is NULL */
260 if (a
== NULL
|| b
== NULL
)
262 /* Identify shorter and longer input; copy the shorter one */
263 if (a
->nwords
<= b
->nwords
)
265 result
= bms_copy(a
);
270 result
= bms_copy(b
);
273 /* And intersect the longer input with the result */
274 resultlen
= result
->nwords
;
275 for (i
= 0; i
< resultlen
; i
++)
276 result
->words
[i
] &= other
->words
[i
];
281 * bms_difference - set difference (ie, A without members of B)
284 bms_difference(const Bitmapset
*a
, const Bitmapset
*b
)
290 /* Handle cases where either input is NULL */
295 /* Copy the left input */
296 result
= bms_copy(a
);
297 /* And remove b's bits from result */
298 shortlen
= Min(a
->nwords
, b
->nwords
);
299 for (i
= 0; i
< shortlen
; i
++)
300 result
->words
[i
] &= ~b
->words
[i
];
305 * bms_is_subset - is A a subset of B?
308 bms_is_subset(const Bitmapset
*a
, const Bitmapset
*b
)
314 /* Handle cases where either input is NULL */
316 return true; /* empty set is a subset of anything */
318 return bms_is_empty(a
);
319 /* Check common words */
320 shortlen
= Min(a
->nwords
, b
->nwords
);
321 for (i
= 0; i
< shortlen
; i
++)
323 if ((a
->words
[i
] & ~b
->words
[i
]) != 0)
326 /* Check extra words */
327 if (a
->nwords
> b
->nwords
)
330 for (; i
< longlen
; i
++)
332 if (a
->words
[i
] != 0)
340 * bms_is_member - is X a member of A?
343 bms_is_member(int x
, const Bitmapset
*a
)
348 /* XXX better to just return false for x<0 ? */
350 elog(ERROR
, "negative bitmapset member not allowed");
353 wordnum
= WORDNUM(x
);
355 if (wordnum
>= a
->nwords
)
357 if ((a
->words
[wordnum
] & ((bitmapword
) 1 << bitnum
)) != 0)
363 * bms_overlap - do sets overlap (ie, have a nonempty intersection)?
366 bms_overlap(const Bitmapset
*a
, const Bitmapset
*b
)
371 /* Handle cases where either input is NULL */
372 if (a
== NULL
|| b
== NULL
)
374 /* Check words in common */
375 shortlen
= Min(a
->nwords
, b
->nwords
);
376 for (i
= 0; i
< shortlen
; i
++)
378 if ((a
->words
[i
] & b
->words
[i
]) != 0)
385 * bms_nonempty_difference - do sets have a nonempty difference?
388 bms_nonempty_difference(const Bitmapset
*a
, const Bitmapset
*b
)
393 /* Handle cases where either input is NULL */
397 return !bms_is_empty(a
);
398 /* Check words in common */
399 shortlen
= Min(a
->nwords
, b
->nwords
);
400 for (i
= 0; i
< shortlen
; i
++)
402 if ((a
->words
[i
] & ~b
->words
[i
]) != 0)
405 /* Check extra words in a */
406 for (; i
< a
->nwords
; i
++)
408 if (a
->words
[i
] != 0)
415 * bms_singleton_member - return the sole integer member of set
417 * Raises error if |a| is not 1.
420 bms_singleton_member(const Bitmapset
*a
)
427 elog(ERROR
, "bitmapset is empty");
429 for (wordnum
= 0; wordnum
< nwords
; wordnum
++)
431 bitmapword w
= a
->words
[wordnum
];
435 if (result
>= 0 || HAS_MULTIPLE_ONES(w
))
436 elog(ERROR
, "bitmapset has multiple members");
437 result
= wordnum
* BITS_PER_BITMAPWORD
;
438 while ((w
& 255) == 0)
443 result
+= rightmost_one_pos
[w
& 255];
447 elog(ERROR
, "bitmapset is empty");
452 * bms_num_members - count members of set
455 bms_num_members(const Bitmapset
*a
)
464 for (wordnum
= 0; wordnum
< nwords
; wordnum
++)
466 bitmapword w
= a
->words
[wordnum
];
468 /* we assume here that bitmapword is an unsigned type */
471 result
+= number_of_ones
[w
& 255];
479 * bms_membership - does a set have zero, one, or multiple members?
481 * This is faster than making an exact count with bms_num_members().
484 bms_membership(const Bitmapset
*a
)
486 BMS_Membership result
= BMS_EMPTY_SET
;
491 return BMS_EMPTY_SET
;
493 for (wordnum
= 0; wordnum
< nwords
; wordnum
++)
495 bitmapword w
= a
->words
[wordnum
];
499 if (result
!= BMS_EMPTY_SET
|| HAS_MULTIPLE_ONES(w
))
501 result
= BMS_SINGLETON
;
508 * bms_is_empty - is a set empty?
510 * This is even faster than bms_membership().
513 bms_is_empty(const Bitmapset
*a
)
521 for (wordnum
= 0; wordnum
< nwords
; wordnum
++)
523 bitmapword w
= a
->words
[wordnum
];
533 * These operations all "recycle" their non-const inputs, ie, either
534 * return the modified input or pfree it if it can't hold the result.
536 * These should generally be used in the style
538 * foo = bms_add_member(foo, x);
543 * bms_add_member - add a specified member to set
545 * Input set is modified or recycled!
548 bms_add_member(Bitmapset
*a
, int x
)
554 elog(ERROR
, "negative bitmapset member not allowed");
556 return bms_make_singleton(x
);
557 wordnum
= WORDNUM(x
);
559 if (wordnum
>= a
->nwords
)
561 /* Slow path: make a larger set and union the input set into it */
566 result
= bms_make_singleton(x
);
568 for (i
= 0; i
< nwords
; i
++)
569 result
->words
[i
] |= a
->words
[i
];
573 /* Fast path: x fits in existing set */
574 a
->words
[wordnum
] |= ((bitmapword
) 1 << bitnum
);
579 * bms_del_member - remove a specified member from set
581 * No error if x is not currently a member of set
583 * Input set is modified in-place!
586 bms_del_member(Bitmapset
*a
, int x
)
592 elog(ERROR
, "negative bitmapset member not allowed");
595 wordnum
= WORDNUM(x
);
597 if (wordnum
< a
->nwords
)
598 a
->words
[wordnum
] &= ~((bitmapword
) 1 << bitnum
);
603 * bms_add_members - like bms_union, but left input is recycled
606 bms_add_members(Bitmapset
*a
, const Bitmapset
*b
)
609 const Bitmapset
*other
;
613 /* Handle cases where either input is NULL */
618 /* Identify shorter and longer input; copy the longer one if needed */
619 if (a
->nwords
< b
->nwords
)
621 result
= bms_copy(b
);
629 /* And union the shorter input into the result */
630 otherlen
= other
->nwords
;
631 for (i
= 0; i
< otherlen
; i
++)
632 result
->words
[i
] |= other
->words
[i
];
639 * bms_int_members - like bms_intersect, but left input is recycled
642 bms_int_members(Bitmapset
*a
, const Bitmapset
*b
)
647 /* Handle cases where either input is NULL */
655 /* Intersect b into a; we need never copy */
656 shortlen
= Min(a
->nwords
, b
->nwords
);
657 for (i
= 0; i
< shortlen
; i
++)
658 a
->words
[i
] &= b
->words
[i
];
659 for (; i
< a
->nwords
; i
++)
665 * bms_del_members - like bms_difference, but left input is recycled
668 bms_del_members(Bitmapset
*a
, const Bitmapset
*b
)
673 /* Handle cases where either input is NULL */
678 /* Remove b's bits from a; we need never copy */
679 shortlen
= Min(a
->nwords
, b
->nwords
);
680 for (i
= 0; i
< shortlen
; i
++)
681 a
->words
[i
] &= ~b
->words
[i
];
686 * bms_join - like bms_union, but *both* inputs are recycled
689 bms_join(Bitmapset
*a
, Bitmapset
*b
)
696 /* Handle cases where either input is NULL */
701 /* Identify shorter and longer input; use longer one as result */
702 if (a
->nwords
< b
->nwords
)
712 /* And union the shorter input into the result */
713 otherlen
= other
->nwords
;
714 for (i
= 0; i
< otherlen
; i
++)
715 result
->words
[i
] |= other
->words
[i
];
716 if (other
!= result
) /* pure paranoia */
722 * bms_first_member - find and remove first member of a set
724 * Returns -1 if set is empty. NB: set is destructively modified!
726 * This is intended as support for iterating through the members of a set.
727 * The typical pattern is
729 * tmpset = bms_copy(inputset);
730 * while ((x = bms_first_member(tmpset)) >= 0)
736 bms_first_member(Bitmapset
*a
)
744 for (wordnum
= 0; wordnum
< nwords
; wordnum
++)
746 bitmapword w
= a
->words
[wordnum
];
752 w
= RIGHTMOST_ONE(w
);
753 a
->words
[wordnum
] &= ~w
;
755 result
= wordnum
* BITS_PER_BITMAPWORD
;
756 while ((w
& 255) == 0)
761 result
+= rightmost_one_pos
[w
& 255];
769 * bms_hash_value - compute a hash key for a Bitmapset
771 * Note: we must ensure that any two bitmapsets that are bms_equal() will
772 * hash to the same value; in practice this means that trailing all-zero
773 * words must not affect the result. Hence we strip those before applying
777 bms_hash_value(const Bitmapset
*a
)
782 return 0; /* All empty sets hash to 0 */
783 for (lastword
= a
->nwords
; --lastword
>= 0;)
785 if (a
->words
[lastword
] != 0)
789 return 0; /* All empty sets hash to 0 */
790 return DatumGetUInt32(hash_any((const unsigned char *) a
->words
,
791 (lastword
+ 1) * sizeof(bitmapword
)));