1 /*--------------------------------------------------------------------------
4 * Test integer set data structure.
6 * Copyright (c) 2019-2024, PostgreSQL Global Development Group
9 * src/test/modules/test_integerset/test_integerset.c
11 * -------------------------------------------------------------------------
15 #include "common/pg_prng.h"
17 #include "lib/integerset.h"
18 #include "miscadmin.h"
19 #include "nodes/bitmapset.h"
20 #include "storage/block.h"
21 #include "storage/itemptr.h"
22 #include "utils/memutils.h"
23 #include "utils/timestamp.h"
26 * If you enable this, the "pattern" tests will print information about
27 * how long populating, probing, and iterating the test set takes, and
28 * how much memory the test set consumed. That can be used as
29 * micro-benchmark of various operations and input patterns (you might
30 * want to increase the number of values used in each of the test, if
31 * you do that, to reduce noise).
33 * The information is printed to the server's stderr, mostly because
34 * that's where MemoryContextStats() output goes.
36 static const bool intset_test_stats
= false;
40 PG_FUNCTION_INFO_V1(test_integerset
);
43 * A struct to define a pattern of integers, for use with the test_pattern()
48 char *test_name
; /* short name of the test, for humans */
49 char *pattern_str
; /* a bit pattern */
50 uint64 spacing
; /* pattern repeats at this interval */
51 uint64 num_values
; /* number of integers to set in total */
54 static const test_spec test_specs
[] = {
56 "all ones", "1111111111",
60 "alternating bits", "0101010101",
64 "clusters of ten", "1111111111",
68 "clusters of hundred",
69 "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111",
77 "sparse", "100000000000000000000000000000001",
81 "single values, distance > 2^32", "1",
82 UINT64CONST(10000000000), 1000000
85 "clusters, distance > 2^32", "10101010",
86 UINT64CONST(10000000000), 10000000
89 "clusters, distance > 2^60", "10101010",
90 UINT64CONST(2000000000000000000),
91 23 /* can't be much higher than this, or we
96 static void test_pattern(const test_spec
*spec
);
97 static void test_empty(void);
98 static void test_single_value(uint64 value
);
99 static void check_with_filler(IntegerSet
*intset
, uint64 x
, uint64 value
, uint64 filler_min
, uint64 filler_max
);
100 static void test_single_value_and_filler(uint64 value
, uint64 filler_min
, uint64 filler_max
);
101 static void test_huge_distances(void);
104 * SQL-callable entry point to perform all tests.
107 test_integerset(PG_FUNCTION_ARGS
)
109 /* Tests for various corner cases */
111 test_huge_distances();
112 test_single_value(0);
113 test_single_value(1);
114 test_single_value(PG_UINT64_MAX
- 1);
115 test_single_value(PG_UINT64_MAX
);
116 test_single_value_and_filler(0, 1000, 2000);
117 test_single_value_and_filler(1, 1000, 2000);
118 test_single_value_and_filler(1, 1000, 2000000);
119 test_single_value_and_filler(PG_UINT64_MAX
- 1, 1000, 2000);
120 test_single_value_and_filler(PG_UINT64_MAX
, 1000, 2000);
122 /* Test different test patterns, with lots of entries */
123 for (int i
= 0; i
< lengthof(test_specs
); i
++)
125 test_pattern(&test_specs
[i
]);
132 * Test with a repeating pattern, defined by the 'spec'.
135 test_pattern(const test_spec
*spec
)
138 MemoryContext intset_ctx
;
139 MemoryContext old_ctx
;
140 TimestampTz starttime
;
145 uint64
*pattern_values
;
146 uint64 pattern_num_values
;
148 elog(NOTICE
, "testing intset with pattern \"%s\"", spec
->test_name
);
149 if (intset_test_stats
)
150 fprintf(stderr
, "-----\ntesting intset with pattern \"%s\"\n", spec
->test_name
);
152 /* Pre-process the pattern, creating an array of integers from it. */
153 patternlen
= strlen(spec
->pattern_str
);
154 pattern_values
= palloc(patternlen
* sizeof(uint64
));
155 pattern_num_values
= 0;
156 for (int i
= 0; i
< patternlen
; i
++)
158 if (spec
->pattern_str
[i
] == '1')
159 pattern_values
[pattern_num_values
++] = i
;
163 * Allocate the integer set.
165 * Allocate it in a separate memory context, so that we can print its
166 * memory usage easily. (intset_create() creates a memory context of its
167 * own, too, but we don't have direct access to it, so we cannot call
168 * MemoryContextStats() on it directly).
170 intset_ctx
= AllocSetContextCreate(CurrentMemoryContext
,
172 ALLOCSET_SMALL_SIZES
);
173 MemoryContextSetIdentifier(intset_ctx
, spec
->test_name
);
174 old_ctx
= MemoryContextSwitchTo(intset_ctx
);
175 intset
= intset_create();
176 MemoryContextSwitchTo(old_ctx
);
179 * Add values to the set.
181 starttime
= GetCurrentTimestamp();
185 while (n
< spec
->num_values
)
189 for (int i
= 0; i
< pattern_num_values
&& n
< spec
->num_values
; i
++)
191 x
= last_int
+ pattern_values
[i
];
193 intset_add_member(intset
, x
);
196 last_int
+= spec
->spacing
;
199 endtime
= GetCurrentTimestamp();
201 if (intset_test_stats
)
202 fprintf(stderr
, "added " UINT64_FORMAT
" values in %d ms\n",
203 spec
->num_values
, (int) (endtime
- starttime
) / 1000);
206 * Print stats on the amount of memory used.
208 * We print the usage reported by intset_memory_usage(), as well as the
209 * stats from the memory context. They should be in the same ballpark,
210 * but it's hard to automate testing that, so if you're making changes to
211 * the implementation, just observe that manually.
213 if (intset_test_stats
)
218 * Also print memory usage as reported by intset_memory_usage(). It
219 * should be in the same ballpark as the usage reported by
220 * MemoryContextStats().
222 mem_usage
= intset_memory_usage(intset
);
223 fprintf(stderr
, "intset_memory_usage() reported " UINT64_FORMAT
" (%0.2f bytes / integer)\n",
224 mem_usage
, (double) mem_usage
/ spec
->num_values
);
226 MemoryContextStats(intset_ctx
);
229 /* Check that intset_get_num_entries works */
230 n
= intset_num_entries(intset
);
231 if (n
!= spec
->num_values
)
232 elog(ERROR
, "intset_num_entries returned " UINT64_FORMAT
", expected " UINT64_FORMAT
, n
, spec
->num_values
);
235 * Test random-access probes with intset_is_member()
237 starttime
= GetCurrentTimestamp();
239 for (n
= 0; n
< 100000; n
++)
246 * Pick next value to probe at random. We limit the probes to the
247 * last integer that we added to the set, plus an arbitrary constant
248 * (1000). There's no point in probing the whole 0 - 2^64 range, if
249 * only a small part of the integer space is used. We would very
250 * rarely hit values that are actually in the set.
252 x
= pg_prng_uint64_range(&pg_global_prng_state
, 0, last_int
+ 1000);
254 /* Do we expect this value to be present in the set? */
259 uint64 idx
= x
% spec
->spacing
;
261 if (idx
>= patternlen
)
263 else if (spec
->pattern_str
[idx
] == '1')
269 /* Is it present according to intset_is_member() ? */
270 b
= intset_is_member(intset
, x
);
273 elog(ERROR
, "mismatch at " UINT64_FORMAT
": %d vs %d", x
, b
, expected
);
275 endtime
= GetCurrentTimestamp();
276 if (intset_test_stats
)
277 fprintf(stderr
, "probed " UINT64_FORMAT
" values in %d ms\n",
278 n
, (int) (endtime
- starttime
) / 1000);
283 starttime
= GetCurrentTimestamp();
285 intset_begin_iterate(intset
);
288 while (n
< spec
->num_values
)
290 for (int i
= 0; i
< pattern_num_values
&& n
< spec
->num_values
; i
++)
292 uint64 expected
= last_int
+ pattern_values
[i
];
295 if (!intset_iterate_next(intset
, &x
))
299 elog(ERROR
, "iterate returned wrong value; got " UINT64_FORMAT
", expected " UINT64_FORMAT
, x
, expected
);
302 last_int
+= spec
->spacing
;
304 endtime
= GetCurrentTimestamp();
305 if (intset_test_stats
)
306 fprintf(stderr
, "iterated " UINT64_FORMAT
" values in %d ms\n",
307 n
, (int) (endtime
- starttime
) / 1000);
309 if (n
< spec
->num_values
)
310 elog(ERROR
, "iterator stopped short after " UINT64_FORMAT
" entries, expected " UINT64_FORMAT
, n
, spec
->num_values
);
311 if (n
> spec
->num_values
)
312 elog(ERROR
, "iterator returned " UINT64_FORMAT
" entries, " UINT64_FORMAT
" was expected", n
, spec
->num_values
);
314 MemoryContextDelete(intset_ctx
);
318 * Test with a set containing a single integer.
321 test_single_value(uint64 value
)
328 elog(NOTICE
, "testing intset with single value " UINT64_FORMAT
, value
);
330 /* Create the set. */
331 intset
= intset_create();
332 intset_add_member(intset
, value
);
334 /* Test intset_get_num_entries() */
335 num_entries
= intset_num_entries(intset
);
336 if (num_entries
!= 1)
337 elog(ERROR
, "intset_num_entries returned " UINT64_FORMAT
", expected 1", num_entries
);
340 * Test intset_is_member() at various special values, like 0 and maximum
341 * possible 64-bit integer, as well as the value itself.
343 if (intset_is_member(intset
, 0) != (value
== 0))
344 elog(ERROR
, "intset_is_member failed for 0");
345 if (intset_is_member(intset
, 1) != (value
== 1))
346 elog(ERROR
, "intset_is_member failed for 1");
347 if (intset_is_member(intset
, PG_UINT64_MAX
) != (value
== PG_UINT64_MAX
))
348 elog(ERROR
, "intset_is_member failed for PG_UINT64_MAX");
349 if (intset_is_member(intset
, value
) != true)
350 elog(ERROR
, "intset_is_member failed for the tested value");
355 intset_begin_iterate(intset
);
356 found
= intset_iterate_next(intset
, &x
);
357 if (!found
|| x
!= value
)
358 elog(ERROR
, "intset_iterate_next failed for " UINT64_FORMAT
, x
);
360 found
= intset_iterate_next(intset
, &x
);
362 elog(ERROR
, "intset_iterate_next failed " UINT64_FORMAT
, x
);
366 * Test with an integer set that contains:
368 * - a given single 'value', and
369 * - all integers between 'filler_min' and 'filler_max'.
371 * This exercises different codepaths than testing just with a single value,
372 * because the implementation buffers newly-added values. If we add just a
373 * single value to the set, we won't test the internal B-tree code at all,
374 * just the code that deals with the buffer.
377 test_single_value_and_filler(uint64 value
, uint64 filler_min
, uint64 filler_max
)
382 uint64
*iter_expected
;
384 uint64 num_entries
= 0;
387 elog(NOTICE
, "testing intset with value " UINT64_FORMAT
", and all between " UINT64_FORMAT
" and " UINT64_FORMAT
,
388 value
, filler_min
, filler_max
);
390 intset
= intset_create();
392 iter_expected
= palloc(sizeof(uint64
) * (filler_max
- filler_min
+ 1));
393 if (value
< filler_min
)
395 intset_add_member(intset
, value
);
396 iter_expected
[n
++] = value
;
399 for (x
= filler_min
; x
< filler_max
; x
++)
401 intset_add_member(intset
, x
);
402 iter_expected
[n
++] = x
;
405 if (value
>= filler_max
)
407 intset_add_member(intset
, value
);
408 iter_expected
[n
++] = value
;
411 /* Test intset_get_num_entries() */
412 num_entries
= intset_num_entries(intset
);
413 if (num_entries
!= n
)
414 elog(ERROR
, "intset_num_entries returned " UINT64_FORMAT
", expected " UINT64_FORMAT
, num_entries
, n
);
417 * Test intset_is_member() at various spots, at and around the values that
418 * we expect to be set, as well as 0 and the maximum possible value.
420 check_with_filler(intset
, 0,
421 value
, filler_min
, filler_max
);
422 check_with_filler(intset
, 1,
423 value
, filler_min
, filler_max
);
424 check_with_filler(intset
, filler_min
- 1,
425 value
, filler_min
, filler_max
);
426 check_with_filler(intset
, filler_min
,
427 value
, filler_min
, filler_max
);
428 check_with_filler(intset
, filler_min
+ 1,
429 value
, filler_min
, filler_max
);
430 check_with_filler(intset
, value
- 1,
431 value
, filler_min
, filler_max
);
432 check_with_filler(intset
, value
,
433 value
, filler_min
, filler_max
);
434 check_with_filler(intset
, value
+ 1,
435 value
, filler_min
, filler_max
);
436 check_with_filler(intset
, filler_max
- 1,
437 value
, filler_min
, filler_max
);
438 check_with_filler(intset
, filler_max
,
439 value
, filler_min
, filler_max
);
440 check_with_filler(intset
, filler_max
+ 1,
441 value
, filler_min
, filler_max
);
442 check_with_filler(intset
, PG_UINT64_MAX
- 1,
443 value
, filler_min
, filler_max
);
444 check_with_filler(intset
, PG_UINT64_MAX
,
445 value
, filler_min
, filler_max
);
447 intset_begin_iterate(intset
);
448 for (uint64 i
= 0; i
< n
; i
++)
450 found
= intset_iterate_next(intset
, &x
);
451 if (!found
|| x
!= iter_expected
[i
])
452 elog(ERROR
, "intset_iterate_next failed for " UINT64_FORMAT
, x
);
454 found
= intset_iterate_next(intset
, &x
);
456 elog(ERROR
, "intset_iterate_next failed " UINT64_FORMAT
, x
);
458 mem_usage
= intset_memory_usage(intset
);
459 if (mem_usage
< 5000 || mem_usage
> 500000000)
460 elog(ERROR
, "intset_memory_usage() reported suspicious value: " UINT64_FORMAT
, mem_usage
);
464 * Helper function for test_single_value_and_filler.
466 * Calls intset_is_member() for value 'x', and checks that the result is what
470 check_with_filler(IntegerSet
*intset
, uint64 x
,
471 uint64 value
, uint64 filler_min
, uint64 filler_max
)
476 expected
= (x
== value
|| (filler_min
<= x
&& x
< filler_max
));
478 actual
= intset_is_member(intset
, x
);
480 if (actual
!= expected
)
481 elog(ERROR
, "intset_is_member failed for " UINT64_FORMAT
, x
);
493 elog(NOTICE
, "testing intset with empty set");
495 intset
= intset_create();
497 /* Test intset_is_member() */
498 if (intset_is_member(intset
, 0) != false)
499 elog(ERROR
, "intset_is_member on empty set returned true");
500 if (intset_is_member(intset
, 1) != false)
501 elog(ERROR
, "intset_is_member on empty set returned true");
502 if (intset_is_member(intset
, PG_UINT64_MAX
) != false)
503 elog(ERROR
, "intset_is_member on empty set returned true");
506 intset_begin_iterate(intset
);
507 if (intset_iterate_next(intset
, &x
))
508 elog(ERROR
, "intset_iterate_next on empty set returned a value (" UINT64_FORMAT
")", x
);
512 * Test with integers that are more than 2^60 apart.
514 * The Simple-8b encoding used by the set implementation can only encode
515 * values up to 2^60. That makes large differences like this interesting
519 test_huge_distances(void)
528 elog(NOTICE
, "testing intset with distances > 2^60 between values");
531 values
[num_values
++] = val
;
533 /* Test differences on both sides of the 2^60 boundary. */
534 val
+= UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
535 values
[num_values
++] = val
;
537 val
+= UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
538 values
[num_values
++] = val
;
540 val
+= UINT64CONST(1152921504606846976); /* 2^60 */
541 values
[num_values
++] = val
;
543 val
+= UINT64CONST(1152921504606846976); /* 2^60 */
544 values
[num_values
++] = val
;
546 val
+= UINT64CONST(1152921504606846976); /* 2^60 */
547 values
[num_values
++] = val
;
549 val
+= UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
550 values
[num_values
++] = val
;
552 val
+= UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
553 values
[num_values
++] = val
;
555 val
+= UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
556 values
[num_values
++] = val
;
558 val
+= UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
559 values
[num_values
++] = val
;
561 val
+= UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
562 values
[num_values
++] = val
;
564 val
+= UINT64CONST(1152921504606846976); /* 2^60 */
565 values
[num_values
++] = val
;
568 * We're now very close to 2^64, so can't add large values anymore. But
569 * add more smaller values to the end, to make sure that all the above
570 * values get flushed and packed into the tree structure.
572 while (num_values
< 1000)
574 val
+= pg_prng_uint32(&pg_global_prng_state
);
575 values
[num_values
++] = val
;
578 /* Create an IntegerSet using these values */
579 intset
= intset_create();
580 for (int i
= 0; i
< num_values
; i
++)
581 intset_add_member(intset
, values
[i
]);
584 * Test intset_is_member() around each of these values
586 for (int i
= 0; i
< num_values
; i
++)
588 uint64 y
= values
[i
];
594 expected
= (values
[i
- 1] == y
- 1);
595 result
= intset_is_member(intset
, y
- 1);
596 if (result
!= expected
)
597 elog(ERROR
, "intset_is_member failed for " UINT64_FORMAT
, y
- 1);
600 result
= intset_is_member(intset
, y
);
602 elog(ERROR
, "intset_is_member failed for " UINT64_FORMAT
, y
);
604 expected
= (i
!= num_values
- 1) ? (values
[i
+ 1] == y
+ 1) : false;
605 result
= intset_is_member(intset
, y
+ 1);
606 if (result
!= expected
)
607 elog(ERROR
, "intset_is_member failed for " UINT64_FORMAT
, y
+ 1);
613 intset_begin_iterate(intset
);
614 for (int i
= 0; i
< num_values
; i
++)
616 found
= intset_iterate_next(intset
, &x
);
617 if (!found
|| x
!= values
[i
])
618 elog(ERROR
, "intset_iterate_next failed for " UINT64_FORMAT
, x
);
620 found
= intset_iterate_next(intset
, &x
);
622 elog(ERROR
, "intset_iterate_next failed " UINT64_FORMAT
, x
);