Get rid of the rather fuzzily defined FlattenedSubLink node type in favor of
[PostgreSQL.git] / src / backend / nodes / bitmapset.c
blobfde1071d6e5ee7768a6f3b78a30ae72686f75406
1 /*-------------------------------------------------------------------------
3 * bitmapset.c
4 * PostgreSQL generic bitmap set package
6 * A bitmap set can represent any set of nonnegative integers, although
7 * it is mainly intended for sets where the maximum value is not large,
8 * say at most a few hundred. By convention, a NULL pointer is always
9 * accepted by all operations to represent the empty set. (But beware
10 * that this is not the only representation of the empty set. Use
11 * bms_is_empty() in preference to testing for NULL.)
14 * Copyright (c) 2003-2009, PostgreSQL Global Development Group
16 * IDENTIFICATION
17 * $PostgreSQL$
19 *-------------------------------------------------------------------------
21 #include "postgres.h"
23 #include "nodes/bitmapset.h"
24 #include "access/hash.h"
27 #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
28 #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
30 #define BITMAPSET_SIZE(nwords) \
31 (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
33 /*----------
34 * This is a well-known cute trick for isolating the rightmost one-bit
35 * in a word. It assumes two's complement arithmetic. Consider any
36 * nonzero value, and focus attention on the rightmost one. The value is
37 * then something like
38 * xxxxxx10000
39 * where x's are unspecified bits. The two's complement negative is formed
40 * by inverting all the bits and adding one. Inversion gives
41 * yyyyyy01111
42 * where each y is the inverse of the corresponding x. Incrementing gives
43 * yyyyyy10000
44 * and then ANDing with the original value gives
45 * 00000010000
46 * This works for all cases except original value = zero, where of course
47 * we get zero.
48 *----------
50 #define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x)))
52 #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
56 * Lookup tables to avoid need for bit-by-bit groveling
58 * rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit
59 * in a nonzero byte value x. The entry for x=0 is never used.
61 * number_of_ones[x] gives the number of one-bits (0-8) in a byte value x.
63 * We could make these tables larger and reduce the number of iterations
64 * in the functions that use them, but bytewise shifts and masks are
65 * especially fast on many machines, so working a byte at a time seems best.
68 static const uint8 rightmost_one_pos[256] = {
69 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
79 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
80 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
81 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
82 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
83 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
84 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
87 static const uint8 number_of_ones[256] = {
88 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
108 * bms_copy - make a palloc'd copy of a bitmapset
110 Bitmapset *
111 bms_copy(const Bitmapset *a)
113 Bitmapset *result;
114 size_t size;
116 if (a == NULL)
117 return NULL;
118 size = BITMAPSET_SIZE(a->nwords);
119 result = (Bitmapset *) palloc(size);
120 memcpy(result, a, size);
121 return result;
125 * bms_equal - are two bitmapsets equal?
127 * This is logical not physical equality; in particular, a NULL pointer will
128 * be reported as equal to a palloc'd value containing no members.
130 bool
131 bms_equal(const Bitmapset *a, const Bitmapset *b)
133 const Bitmapset *shorter;
134 const Bitmapset *longer;
135 int shortlen;
136 int longlen;
137 int i;
139 /* Handle cases where either input is NULL */
140 if (a == NULL)
142 if (b == NULL)
143 return true;
144 return bms_is_empty(b);
146 else if (b == NULL)
147 return bms_is_empty(a);
148 /* Identify shorter and longer input */
149 if (a->nwords <= b->nwords)
151 shorter = a;
152 longer = b;
154 else
156 shorter = b;
157 longer = a;
159 /* And process */
160 shortlen = shorter->nwords;
161 for (i = 0; i < shortlen; i++)
163 if (shorter->words[i] != longer->words[i])
164 return false;
166 longlen = longer->nwords;
167 for (; i < longlen; i++)
169 if (longer->words[i] != 0)
170 return false;
172 return true;
176 * bms_make_singleton - build a bitmapset containing a single member
178 Bitmapset *
179 bms_make_singleton(int x)
181 Bitmapset *result;
182 int wordnum,
183 bitnum;
185 if (x < 0)
186 elog(ERROR, "negative bitmapset member not allowed");
187 wordnum = WORDNUM(x);
188 bitnum = BITNUM(x);
189 result = (Bitmapset *) palloc0(BITMAPSET_SIZE(wordnum + 1));
190 result->nwords = wordnum + 1;
191 result->words[wordnum] = ((bitmapword) 1 << bitnum);
192 return result;
196 * bms_free - free a bitmapset
198 * Same as pfree except for allowing NULL input
200 void
201 bms_free(Bitmapset *a)
203 if (a)
204 pfree(a);
209 * These operations all make a freshly palloc'd result,
210 * leaving their inputs untouched
215 * bms_union - set union
217 Bitmapset *
218 bms_union(const Bitmapset *a, const Bitmapset *b)
220 Bitmapset *result;
221 const Bitmapset *other;
222 int otherlen;
223 int i;
225 /* Handle cases where either input is NULL */
226 if (a == NULL)
227 return bms_copy(b);
228 if (b == NULL)
229 return bms_copy(a);
230 /* Identify shorter and longer input; copy the longer one */
231 if (a->nwords <= b->nwords)
233 result = bms_copy(b);
234 other = a;
236 else
238 result = bms_copy(a);
239 other = b;
241 /* And union the shorter input into the result */
242 otherlen = other->nwords;
243 for (i = 0; i < otherlen; i++)
244 result->words[i] |= other->words[i];
245 return result;
249 * bms_intersect - set intersection
251 Bitmapset *
252 bms_intersect(const Bitmapset *a, const Bitmapset *b)
254 Bitmapset *result;
255 const Bitmapset *other;
256 int resultlen;
257 int i;
259 /* Handle cases where either input is NULL */
260 if (a == NULL || b == NULL)
261 return NULL;
262 /* Identify shorter and longer input; copy the shorter one */
263 if (a->nwords <= b->nwords)
265 result = bms_copy(a);
266 other = b;
268 else
270 result = bms_copy(b);
271 other = a;
273 /* And intersect the longer input with the result */
274 resultlen = result->nwords;
275 for (i = 0; i < resultlen; i++)
276 result->words[i] &= other->words[i];
277 return result;
281 * bms_difference - set difference (ie, A without members of B)
283 Bitmapset *
284 bms_difference(const Bitmapset *a, const Bitmapset *b)
286 Bitmapset *result;
287 int shortlen;
288 int i;
290 /* Handle cases where either input is NULL */
291 if (a == NULL)
292 return NULL;
293 if (b == NULL)
294 return bms_copy(a);
295 /* Copy the left input */
296 result = bms_copy(a);
297 /* And remove b's bits from result */
298 shortlen = Min(a->nwords, b->nwords);
299 for (i = 0; i < shortlen; i++)
300 result->words[i] &= ~b->words[i];
301 return result;
305 * bms_is_subset - is A a subset of B?
307 bool
308 bms_is_subset(const Bitmapset *a, const Bitmapset *b)
310 int shortlen;
311 int longlen;
312 int i;
314 /* Handle cases where either input is NULL */
315 if (a == NULL)
316 return true; /* empty set is a subset of anything */
317 if (b == NULL)
318 return bms_is_empty(a);
319 /* Check common words */
320 shortlen = Min(a->nwords, b->nwords);
321 for (i = 0; i < shortlen; i++)
323 if ((a->words[i] & ~b->words[i]) != 0)
324 return false;
326 /* Check extra words */
327 if (a->nwords > b->nwords)
329 longlen = a->nwords;
330 for (; i < longlen; i++)
332 if (a->words[i] != 0)
333 return false;
336 return true;
340 * bms_is_member - is X a member of A?
342 bool
343 bms_is_member(int x, const Bitmapset *a)
345 int wordnum,
346 bitnum;
348 /* XXX better to just return false for x<0 ? */
349 if (x < 0)
350 elog(ERROR, "negative bitmapset member not allowed");
351 if (a == NULL)
352 return false;
353 wordnum = WORDNUM(x);
354 bitnum = BITNUM(x);
355 if (wordnum >= a->nwords)
356 return false;
357 if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
358 return true;
359 return false;
363 * bms_overlap - do sets overlap (ie, have a nonempty intersection)?
365 bool
366 bms_overlap(const Bitmapset *a, const Bitmapset *b)
368 int shortlen;
369 int i;
371 /* Handle cases where either input is NULL */
372 if (a == NULL || b == NULL)
373 return false;
374 /* Check words in common */
375 shortlen = Min(a->nwords, b->nwords);
376 for (i = 0; i < shortlen; i++)
378 if ((a->words[i] & b->words[i]) != 0)
379 return true;
381 return false;
385 * bms_nonempty_difference - do sets have a nonempty difference?
387 bool
388 bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
390 int shortlen;
391 int i;
393 /* Handle cases where either input is NULL */
394 if (a == NULL)
395 return false;
396 if (b == NULL)
397 return !bms_is_empty(a);
398 /* Check words in common */
399 shortlen = Min(a->nwords, b->nwords);
400 for (i = 0; i < shortlen; i++)
402 if ((a->words[i] & ~b->words[i]) != 0)
403 return true;
405 /* Check extra words in a */
406 for (; i < a->nwords; i++)
408 if (a->words[i] != 0)
409 return true;
411 return false;
415 * bms_singleton_member - return the sole integer member of set
417 * Raises error if |a| is not 1.
420 bms_singleton_member(const Bitmapset *a)
422 int result = -1;
423 int nwords;
424 int wordnum;
426 if (a == NULL)
427 elog(ERROR, "bitmapset is empty");
428 nwords = a->nwords;
429 for (wordnum = 0; wordnum < nwords; wordnum++)
431 bitmapword w = a->words[wordnum];
433 if (w != 0)
435 if (result >= 0 || HAS_MULTIPLE_ONES(w))
436 elog(ERROR, "bitmapset has multiple members");
437 result = wordnum * BITS_PER_BITMAPWORD;
438 while ((w & 255) == 0)
440 w >>= 8;
441 result += 8;
443 result += rightmost_one_pos[w & 255];
446 if (result < 0)
447 elog(ERROR, "bitmapset is empty");
448 return result;
452 * bms_num_members - count members of set
455 bms_num_members(const Bitmapset *a)
457 int result = 0;
458 int nwords;
459 int wordnum;
461 if (a == NULL)
462 return 0;
463 nwords = a->nwords;
464 for (wordnum = 0; wordnum < nwords; wordnum++)
466 bitmapword w = a->words[wordnum];
468 /* we assume here that bitmapword is an unsigned type */
469 while (w != 0)
471 result += number_of_ones[w & 255];
472 w >>= 8;
475 return result;
479 * bms_membership - does a set have zero, one, or multiple members?
481 * This is faster than making an exact count with bms_num_members().
483 BMS_Membership
484 bms_membership(const Bitmapset *a)
486 BMS_Membership result = BMS_EMPTY_SET;
487 int nwords;
488 int wordnum;
490 if (a == NULL)
491 return BMS_EMPTY_SET;
492 nwords = a->nwords;
493 for (wordnum = 0; wordnum < nwords; wordnum++)
495 bitmapword w = a->words[wordnum];
497 if (w != 0)
499 if (result != BMS_EMPTY_SET || HAS_MULTIPLE_ONES(w))
500 return BMS_MULTIPLE;
501 result = BMS_SINGLETON;
504 return result;
508 * bms_is_empty - is a set empty?
510 * This is even faster than bms_membership().
512 bool
513 bms_is_empty(const Bitmapset *a)
515 int nwords;
516 int wordnum;
518 if (a == NULL)
519 return true;
520 nwords = a->nwords;
521 for (wordnum = 0; wordnum < nwords; wordnum++)
523 bitmapword w = a->words[wordnum];
525 if (w != 0)
526 return false;
528 return true;
533 * These operations all "recycle" their non-const inputs, ie, either
534 * return the modified input or pfree it if it can't hold the result.
536 * These should generally be used in the style
538 * foo = bms_add_member(foo, x);
543 * bms_add_member - add a specified member to set
545 * Input set is modified or recycled!
547 Bitmapset *
548 bms_add_member(Bitmapset *a, int x)
550 int wordnum,
551 bitnum;
553 if (x < 0)
554 elog(ERROR, "negative bitmapset member not allowed");
555 if (a == NULL)
556 return bms_make_singleton(x);
557 wordnum = WORDNUM(x);
558 bitnum = BITNUM(x);
559 if (wordnum >= a->nwords)
561 /* Slow path: make a larger set and union the input set into it */
562 Bitmapset *result;
563 int nwords;
564 int i;
566 result = bms_make_singleton(x);
567 nwords = a->nwords;
568 for (i = 0; i < nwords; i++)
569 result->words[i] |= a->words[i];
570 pfree(a);
571 return result;
573 /* Fast path: x fits in existing set */
574 a->words[wordnum] |= ((bitmapword) 1 << bitnum);
575 return a;
579 * bms_del_member - remove a specified member from set
581 * No error if x is not currently a member of set
583 * Input set is modified in-place!
585 Bitmapset *
586 bms_del_member(Bitmapset *a, int x)
588 int wordnum,
589 bitnum;
591 if (x < 0)
592 elog(ERROR, "negative bitmapset member not allowed");
593 if (a == NULL)
594 return NULL;
595 wordnum = WORDNUM(x);
596 bitnum = BITNUM(x);
597 if (wordnum < a->nwords)
598 a->words[wordnum] &= ~((bitmapword) 1 << bitnum);
599 return a;
603 * bms_add_members - like bms_union, but left input is recycled
605 Bitmapset *
606 bms_add_members(Bitmapset *a, const Bitmapset *b)
608 Bitmapset *result;
609 const Bitmapset *other;
610 int otherlen;
611 int i;
613 /* Handle cases where either input is NULL */
614 if (a == NULL)
615 return bms_copy(b);
616 if (b == NULL)
617 return a;
618 /* Identify shorter and longer input; copy the longer one if needed */
619 if (a->nwords < b->nwords)
621 result = bms_copy(b);
622 other = a;
624 else
626 result = a;
627 other = b;
629 /* And union the shorter input into the result */
630 otherlen = other->nwords;
631 for (i = 0; i < otherlen; i++)
632 result->words[i] |= other->words[i];
633 if (result != a)
634 pfree(a);
635 return result;
639 * bms_int_members - like bms_intersect, but left input is recycled
641 Bitmapset *
642 bms_int_members(Bitmapset *a, const Bitmapset *b)
644 int shortlen;
645 int i;
647 /* Handle cases where either input is NULL */
648 if (a == NULL)
649 return NULL;
650 if (b == NULL)
652 pfree(a);
653 return NULL;
655 /* Intersect b into a; we need never copy */
656 shortlen = Min(a->nwords, b->nwords);
657 for (i = 0; i < shortlen; i++)
658 a->words[i] &= b->words[i];
659 for (; i < a->nwords; i++)
660 a->words[i] = 0;
661 return a;
665 * bms_del_members - like bms_difference, but left input is recycled
667 Bitmapset *
668 bms_del_members(Bitmapset *a, const Bitmapset *b)
670 int shortlen;
671 int i;
673 /* Handle cases where either input is NULL */
674 if (a == NULL)
675 return NULL;
676 if (b == NULL)
677 return a;
678 /* Remove b's bits from a; we need never copy */
679 shortlen = Min(a->nwords, b->nwords);
680 for (i = 0; i < shortlen; i++)
681 a->words[i] &= ~b->words[i];
682 return a;
686 * bms_join - like bms_union, but *both* inputs are recycled
688 Bitmapset *
689 bms_join(Bitmapset *a, Bitmapset *b)
691 Bitmapset *result;
692 Bitmapset *other;
693 int otherlen;
694 int i;
696 /* Handle cases where either input is NULL */
697 if (a == NULL)
698 return b;
699 if (b == NULL)
700 return a;
701 /* Identify shorter and longer input; use longer one as result */
702 if (a->nwords < b->nwords)
704 result = b;
705 other = a;
707 else
709 result = a;
710 other = b;
712 /* And union the shorter input into the result */
713 otherlen = other->nwords;
714 for (i = 0; i < otherlen; i++)
715 result->words[i] |= other->words[i];
716 if (other != result) /* pure paranoia */
717 pfree(other);
718 return result;
721 /*----------
722 * bms_first_member - find and remove first member of a set
724 * Returns -1 if set is empty. NB: set is destructively modified!
726 * This is intended as support for iterating through the members of a set.
727 * The typical pattern is
729 * tmpset = bms_copy(inputset);
730 * while ((x = bms_first_member(tmpset)) >= 0)
731 * process member x;
732 * bms_free(tmpset);
733 *----------
736 bms_first_member(Bitmapset *a)
738 int nwords;
739 int wordnum;
741 if (a == NULL)
742 return -1;
743 nwords = a->nwords;
744 for (wordnum = 0; wordnum < nwords; wordnum++)
746 bitmapword w = a->words[wordnum];
748 if (w != 0)
750 int result;
752 w = RIGHTMOST_ONE(w);
753 a->words[wordnum] &= ~w;
755 result = wordnum * BITS_PER_BITMAPWORD;
756 while ((w & 255) == 0)
758 w >>= 8;
759 result += 8;
761 result += rightmost_one_pos[w & 255];
762 return result;
765 return -1;
769 * bms_hash_value - compute a hash key for a Bitmapset
771 * Note: we must ensure that any two bitmapsets that are bms_equal() will
772 * hash to the same value; in practice this means that trailing all-zero
773 * words must not affect the result. Hence we strip those before applying
774 * hash_any().
776 uint32
777 bms_hash_value(const Bitmapset *a)
779 int lastword;
781 if (a == NULL)
782 return 0; /* All empty sets hash to 0 */
783 for (lastword = a->nwords; --lastword >= 0;)
785 if (a->words[lastword] != 0)
786 break;
788 if (lastword < 0)
789 return 0; /* All empty sets hash to 0 */
790 return DatumGetUInt32(hash_any((const unsigned char *) a->words,
791 (lastword + 1) * sizeof(bitmapword)));