1 /*--------------------------------------------------------------------------
4 * Test integer set data structure.
6 * Copyright (c) 2019-2024, PostgreSQL Global Development Group
9 * src/test/modules/test_integerset/test_integerset.c
11 * -------------------------------------------------------------------------
15 #include "common/pg_prng.h"
17 #include "lib/integerset.h"
18 #include "utils/memutils.h"
19 #include "utils/timestamp.h"
22 * If you enable this, the "pattern" tests will print information about
23 * how long populating, probing, and iterating the test set takes, and
24 * how much memory the test set consumed. That can be used as
25 * micro-benchmark of various operations and input patterns (you might
26 * want to increase the number of values used in each of the test, if
27 * you do that, to reduce noise).
29 * The information is printed to the server's stderr, mostly because
30 * that's where MemoryContextStats() output goes.
32 static const bool intset_test_stats
= false;
36 PG_FUNCTION_INFO_V1(test_integerset
);
39 * A struct to define a pattern of integers, for use with the test_pattern()
44 char *test_name
; /* short name of the test, for humans */
45 char *pattern_str
; /* a bit pattern */
46 uint64 spacing
; /* pattern repeats at this interval */
47 uint64 num_values
; /* number of integers to set in total */
50 static const test_spec test_specs
[] = {
52 "all ones", "1111111111",
56 "alternating bits", "0101010101",
60 "clusters of ten", "1111111111",
64 "clusters of hundred",
65 "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111",
73 "sparse", "100000000000000000000000000000001",
77 "single values, distance > 2^32", "1",
78 UINT64CONST(10000000000), 1000000
81 "clusters, distance > 2^32", "10101010",
82 UINT64CONST(10000000000), 10000000
85 "clusters, distance > 2^60", "10101010",
86 UINT64CONST(2000000000000000000),
87 23 /* can't be much higher than this, or we
92 static void test_pattern(const test_spec
*spec
);
93 static void test_empty(void);
94 static void test_single_value(uint64 value
);
95 static void check_with_filler(IntegerSet
*intset
, uint64 x
, uint64 value
, uint64 filler_min
, uint64 filler_max
);
96 static void test_single_value_and_filler(uint64 value
, uint64 filler_min
, uint64 filler_max
);
97 static void test_huge_distances(void);
100 * SQL-callable entry point to perform all tests.
103 test_integerset(PG_FUNCTION_ARGS
)
105 /* Tests for various corner cases */
107 test_huge_distances();
108 test_single_value(0);
109 test_single_value(1);
110 test_single_value(PG_UINT64_MAX
- 1);
111 test_single_value(PG_UINT64_MAX
);
112 test_single_value_and_filler(0, 1000, 2000);
113 test_single_value_and_filler(1, 1000, 2000);
114 test_single_value_and_filler(1, 1000, 2000000);
115 test_single_value_and_filler(PG_UINT64_MAX
- 1, 1000, 2000);
116 test_single_value_and_filler(PG_UINT64_MAX
, 1000, 2000);
118 /* Test different test patterns, with lots of entries */
119 for (int i
= 0; i
< lengthof(test_specs
); i
++)
121 test_pattern(&test_specs
[i
]);
128 * Test with a repeating pattern, defined by the 'spec'.
131 test_pattern(const test_spec
*spec
)
134 MemoryContext intset_ctx
;
135 MemoryContext old_ctx
;
136 TimestampTz starttime
;
141 uint64
*pattern_values
;
142 uint64 pattern_num_values
;
144 elog(NOTICE
, "testing intset with pattern \"%s\"", spec
->test_name
);
145 if (intset_test_stats
)
146 fprintf(stderr
, "-----\ntesting intset with pattern \"%s\"\n", spec
->test_name
);
148 /* Pre-process the pattern, creating an array of integers from it. */
149 patternlen
= strlen(spec
->pattern_str
);
150 pattern_values
= palloc(patternlen
* sizeof(uint64
));
151 pattern_num_values
= 0;
152 for (int i
= 0; i
< patternlen
; i
++)
154 if (spec
->pattern_str
[i
] == '1')
155 pattern_values
[pattern_num_values
++] = i
;
159 * Allocate the integer set.
161 * Allocate it in a separate memory context, so that we can print its
162 * memory usage easily. (intset_create() creates a memory context of its
163 * own, too, but we don't have direct access to it, so we cannot call
164 * MemoryContextStats() on it directly).
166 intset_ctx
= AllocSetContextCreate(CurrentMemoryContext
,
168 ALLOCSET_SMALL_SIZES
);
169 MemoryContextSetIdentifier(intset_ctx
, spec
->test_name
);
170 old_ctx
= MemoryContextSwitchTo(intset_ctx
);
171 intset
= intset_create();
172 MemoryContextSwitchTo(old_ctx
);
175 * Add values to the set.
177 starttime
= GetCurrentTimestamp();
181 while (n
< spec
->num_values
)
185 for (int i
= 0; i
< pattern_num_values
&& n
< spec
->num_values
; i
++)
187 x
= last_int
+ pattern_values
[i
];
189 intset_add_member(intset
, x
);
192 last_int
+= spec
->spacing
;
195 endtime
= GetCurrentTimestamp();
197 if (intset_test_stats
)
198 fprintf(stderr
, "added " UINT64_FORMAT
" values in %d ms\n",
199 spec
->num_values
, (int) (endtime
- starttime
) / 1000);
202 * Print stats on the amount of memory used.
204 * We print the usage reported by intset_memory_usage(), as well as the
205 * stats from the memory context. They should be in the same ballpark,
206 * but it's hard to automate testing that, so if you're making changes to
207 * the implementation, just observe that manually.
209 if (intset_test_stats
)
214 * Also print memory usage as reported by intset_memory_usage(). It
215 * should be in the same ballpark as the usage reported by
216 * MemoryContextStats().
218 mem_usage
= intset_memory_usage(intset
);
219 fprintf(stderr
, "intset_memory_usage() reported " UINT64_FORMAT
" (%0.2f bytes / integer)\n",
220 mem_usage
, (double) mem_usage
/ spec
->num_values
);
222 MemoryContextStats(intset_ctx
);
225 /* Check that intset_get_num_entries works */
226 n
= intset_num_entries(intset
);
227 if (n
!= spec
->num_values
)
228 elog(ERROR
, "intset_num_entries returned " UINT64_FORMAT
", expected " UINT64_FORMAT
, n
, spec
->num_values
);
231 * Test random-access probes with intset_is_member()
233 starttime
= GetCurrentTimestamp();
235 for (n
= 0; n
< 100000; n
++)
242 * Pick next value to probe at random. We limit the probes to the
243 * last integer that we added to the set, plus an arbitrary constant
244 * (1000). There's no point in probing the whole 0 - 2^64 range, if
245 * only a small part of the integer space is used. We would very
246 * rarely hit values that are actually in the set.
248 x
= pg_prng_uint64_range(&pg_global_prng_state
, 0, last_int
+ 1000);
250 /* Do we expect this value to be present in the set? */
255 uint64 idx
= x
% spec
->spacing
;
257 if (idx
>= patternlen
)
259 else if (spec
->pattern_str
[idx
] == '1')
265 /* Is it present according to intset_is_member() ? */
266 b
= intset_is_member(intset
, x
);
269 elog(ERROR
, "mismatch at " UINT64_FORMAT
": %d vs %d", x
, b
, expected
);
271 endtime
= GetCurrentTimestamp();
272 if (intset_test_stats
)
273 fprintf(stderr
, "probed " UINT64_FORMAT
" values in %d ms\n",
274 n
, (int) (endtime
- starttime
) / 1000);
279 starttime
= GetCurrentTimestamp();
281 intset_begin_iterate(intset
);
284 while (n
< spec
->num_values
)
286 for (int i
= 0; i
< pattern_num_values
&& n
< spec
->num_values
; i
++)
288 uint64 expected
= last_int
+ pattern_values
[i
];
291 if (!intset_iterate_next(intset
, &x
))
295 elog(ERROR
, "iterate returned wrong value; got " UINT64_FORMAT
", expected " UINT64_FORMAT
, x
, expected
);
298 last_int
+= spec
->spacing
;
300 endtime
= GetCurrentTimestamp();
301 if (intset_test_stats
)
302 fprintf(stderr
, "iterated " UINT64_FORMAT
" values in %d ms\n",
303 n
, (int) (endtime
- starttime
) / 1000);
305 if (n
< spec
->num_values
)
306 elog(ERROR
, "iterator stopped short after " UINT64_FORMAT
" entries, expected " UINT64_FORMAT
, n
, spec
->num_values
);
307 if (n
> spec
->num_values
)
308 elog(ERROR
, "iterator returned " UINT64_FORMAT
" entries, " UINT64_FORMAT
" was expected", n
, spec
->num_values
);
310 MemoryContextDelete(intset_ctx
);
314 * Test with a set containing a single integer.
317 test_single_value(uint64 value
)
324 elog(NOTICE
, "testing intset with single value " UINT64_FORMAT
, value
);
326 /* Create the set. */
327 intset
= intset_create();
328 intset_add_member(intset
, value
);
330 /* Test intset_get_num_entries() */
331 num_entries
= intset_num_entries(intset
);
332 if (num_entries
!= 1)
333 elog(ERROR
, "intset_num_entries returned " UINT64_FORMAT
", expected 1", num_entries
);
336 * Test intset_is_member() at various special values, like 0 and maximum
337 * possible 64-bit integer, as well as the value itself.
339 if (intset_is_member(intset
, 0) != (value
== 0))
340 elog(ERROR
, "intset_is_member failed for 0");
341 if (intset_is_member(intset
, 1) != (value
== 1))
342 elog(ERROR
, "intset_is_member failed for 1");
343 if (intset_is_member(intset
, PG_UINT64_MAX
) != (value
== PG_UINT64_MAX
))
344 elog(ERROR
, "intset_is_member failed for PG_UINT64_MAX");
345 if (intset_is_member(intset
, value
) != true)
346 elog(ERROR
, "intset_is_member failed for the tested value");
351 intset_begin_iterate(intset
);
352 found
= intset_iterate_next(intset
, &x
);
353 if (!found
|| x
!= value
)
354 elog(ERROR
, "intset_iterate_next failed for " UINT64_FORMAT
, x
);
356 found
= intset_iterate_next(intset
, &x
);
358 elog(ERROR
, "intset_iterate_next failed " UINT64_FORMAT
, x
);
362 * Test with an integer set that contains:
364 * - a given single 'value', and
365 * - all integers between 'filler_min' and 'filler_max'.
367 * This exercises different codepaths than testing just with a single value,
368 * because the implementation buffers newly-added values. If we add just a
369 * single value to the set, we won't test the internal B-tree code at all,
370 * just the code that deals with the buffer.
373 test_single_value_and_filler(uint64 value
, uint64 filler_min
, uint64 filler_max
)
378 uint64
*iter_expected
;
380 uint64 num_entries
= 0;
383 elog(NOTICE
, "testing intset with value " UINT64_FORMAT
", and all between " UINT64_FORMAT
" and " UINT64_FORMAT
,
384 value
, filler_min
, filler_max
);
386 intset
= intset_create();
388 iter_expected
= palloc(sizeof(uint64
) * (filler_max
- filler_min
+ 1));
389 if (value
< filler_min
)
391 intset_add_member(intset
, value
);
392 iter_expected
[n
++] = value
;
395 for (x
= filler_min
; x
< filler_max
; x
++)
397 intset_add_member(intset
, x
);
398 iter_expected
[n
++] = x
;
401 if (value
>= filler_max
)
403 intset_add_member(intset
, value
);
404 iter_expected
[n
++] = value
;
407 /* Test intset_get_num_entries() */
408 num_entries
= intset_num_entries(intset
);
409 if (num_entries
!= n
)
410 elog(ERROR
, "intset_num_entries returned " UINT64_FORMAT
", expected " UINT64_FORMAT
, num_entries
, n
);
413 * Test intset_is_member() at various spots, at and around the values that
414 * we expect to be set, as well as 0 and the maximum possible value.
416 check_with_filler(intset
, 0,
417 value
, filler_min
, filler_max
);
418 check_with_filler(intset
, 1,
419 value
, filler_min
, filler_max
);
420 check_with_filler(intset
, filler_min
- 1,
421 value
, filler_min
, filler_max
);
422 check_with_filler(intset
, filler_min
,
423 value
, filler_min
, filler_max
);
424 check_with_filler(intset
, filler_min
+ 1,
425 value
, filler_min
, filler_max
);
426 check_with_filler(intset
, value
- 1,
427 value
, filler_min
, filler_max
);
428 check_with_filler(intset
, value
,
429 value
, filler_min
, filler_max
);
430 check_with_filler(intset
, value
+ 1,
431 value
, filler_min
, filler_max
);
432 check_with_filler(intset
, filler_max
- 1,
433 value
, filler_min
, filler_max
);
434 check_with_filler(intset
, filler_max
,
435 value
, filler_min
, filler_max
);
436 check_with_filler(intset
, filler_max
+ 1,
437 value
, filler_min
, filler_max
);
438 check_with_filler(intset
, PG_UINT64_MAX
- 1,
439 value
, filler_min
, filler_max
);
440 check_with_filler(intset
, PG_UINT64_MAX
,
441 value
, filler_min
, filler_max
);
443 intset_begin_iterate(intset
);
444 for (uint64 i
= 0; i
< n
; i
++)
446 found
= intset_iterate_next(intset
, &x
);
447 if (!found
|| x
!= iter_expected
[i
])
448 elog(ERROR
, "intset_iterate_next failed for " UINT64_FORMAT
, x
);
450 found
= intset_iterate_next(intset
, &x
);
452 elog(ERROR
, "intset_iterate_next failed " UINT64_FORMAT
, x
);
454 mem_usage
= intset_memory_usage(intset
);
455 if (mem_usage
< 5000 || mem_usage
> 500000000)
456 elog(ERROR
, "intset_memory_usage() reported suspicious value: " UINT64_FORMAT
, mem_usage
);
460 * Helper function for test_single_value_and_filler.
462 * Calls intset_is_member() for value 'x', and checks that the result is what
466 check_with_filler(IntegerSet
*intset
, uint64 x
,
467 uint64 value
, uint64 filler_min
, uint64 filler_max
)
472 expected
= (x
== value
|| (filler_min
<= x
&& x
< filler_max
));
474 actual
= intset_is_member(intset
, x
);
476 if (actual
!= expected
)
477 elog(ERROR
, "intset_is_member failed for " UINT64_FORMAT
, x
);
489 elog(NOTICE
, "testing intset with empty set");
491 intset
= intset_create();
493 /* Test intset_is_member() */
494 if (intset_is_member(intset
, 0) != false)
495 elog(ERROR
, "intset_is_member on empty set returned true");
496 if (intset_is_member(intset
, 1) != false)
497 elog(ERROR
, "intset_is_member on empty set returned true");
498 if (intset_is_member(intset
, PG_UINT64_MAX
) != false)
499 elog(ERROR
, "intset_is_member on empty set returned true");
502 intset_begin_iterate(intset
);
503 if (intset_iterate_next(intset
, &x
))
504 elog(ERROR
, "intset_iterate_next on empty set returned a value (" UINT64_FORMAT
")", x
);
508 * Test with integers that are more than 2^60 apart.
510 * The Simple-8b encoding used by the set implementation can only encode
511 * values up to 2^60. That makes large differences like this interesting
515 test_huge_distances(void)
524 elog(NOTICE
, "testing intset with distances > 2^60 between values");
527 values
[num_values
++] = val
;
529 /* Test differences on both sides of the 2^60 boundary. */
530 val
+= UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
531 values
[num_values
++] = val
;
533 val
+= UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
534 values
[num_values
++] = val
;
536 val
+= UINT64CONST(1152921504606846976); /* 2^60 */
537 values
[num_values
++] = val
;
539 val
+= UINT64CONST(1152921504606846976); /* 2^60 */
540 values
[num_values
++] = val
;
542 val
+= UINT64CONST(1152921504606846976); /* 2^60 */
543 values
[num_values
++] = val
;
545 val
+= UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
546 values
[num_values
++] = val
;
548 val
+= UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
549 values
[num_values
++] = val
;
551 val
+= UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
552 values
[num_values
++] = val
;
554 val
+= UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
555 values
[num_values
++] = val
;
557 val
+= UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
558 values
[num_values
++] = val
;
560 val
+= UINT64CONST(1152921504606846976); /* 2^60 */
561 values
[num_values
++] = val
;
564 * We're now very close to 2^64, so can't add large values anymore. But
565 * add more smaller values to the end, to make sure that all the above
566 * values get flushed and packed into the tree structure.
568 while (num_values
< 1000)
570 val
+= pg_prng_uint32(&pg_global_prng_state
);
571 values
[num_values
++] = val
;
574 /* Create an IntegerSet using these values */
575 intset
= intset_create();
576 for (int i
= 0; i
< num_values
; i
++)
577 intset_add_member(intset
, values
[i
]);
580 * Test intset_is_member() around each of these values
582 for (int i
= 0; i
< num_values
; i
++)
584 uint64 y
= values
[i
];
590 expected
= (values
[i
- 1] == y
- 1);
591 result
= intset_is_member(intset
, y
- 1);
592 if (result
!= expected
)
593 elog(ERROR
, "intset_is_member failed for " UINT64_FORMAT
, y
- 1);
596 result
= intset_is_member(intset
, y
);
598 elog(ERROR
, "intset_is_member failed for " UINT64_FORMAT
, y
);
600 expected
= (i
!= num_values
- 1) ? (values
[i
+ 1] == y
+ 1) : false;
601 result
= intset_is_member(intset
, y
+ 1);
602 if (result
!= expected
)
603 elog(ERROR
, "intset_is_member failed for " UINT64_FORMAT
, y
+ 1);
609 intset_begin_iterate(intset
);
610 for (int i
= 0; i
< num_values
; i
++)
612 found
= intset_iterate_next(intset
, &x
);
613 if (!found
|| x
!= values
[i
])
614 elog(ERROR
, "intset_iterate_next failed for " UINT64_FORMAT
, x
);
616 found
= intset_iterate_next(intset
, &x
);
618 elog(ERROR
, "intset_iterate_next failed " UINT64_FORMAT
, x
);