2 * PCG Random Number Generation for C++
4 * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org>
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 * For additional information about the PCG random number generation scheme,
19 * including its license and other licensing options, visit
21 * http://www.pcg-random.org
25 * This file provides support code that is useful for random-number generation
26 * but not specific to the PCG generation scheme, including:
27 * - 128-bit int support for platforms where it isn't available natively
28 * - bit twiddling operations
29 * - I/O of 128-bit and 8-bit integers
30 * - Handling the evilness of SeedSeq
31 * - Support for efficiently producing random numbers less than a given
35 #ifndef PCG_EXTRAS_HPP_INCLUDED
36 #define PCG_EXTRAS_HPP_INCLUDED 1
45 #include <type_traits>
56 * Abstractions for compiler-specific directives
60 #define PCG_NOINLINE __attribute__((noinline))
66 * Some members of the PCG library use 128-bit math. When compiling on 64-bit
67 * platforms, both GCC and Clang provide 128-bit integer types that are ideal
70 * On 32-bit platforms (or with other compilers), we fall back to a C++
71 * class that provides 128-bit unsigned integers instead. It may seem
72 * like we're reinventing the wheel here, because libraries already exist
73 * that support large integers, but most existing libraries provide a very
74 * generic multiprecision code, but here we're operating at a fixed size.
75 * Also, most other libraries are fairly heavyweight. So we use a direct
76 * implementation. Sadly, it's much slower than hand-coded assembly or
81 namespace pcg_extras
{
82 typedef __uint128_t pcg128_t
;
84 #define PCG_128BIT_CONSTANT(high,low) \
85 ((pcg128_t(high) << 64) + low)
87 #include "pcg_uint128.hpp"
88 namespace pcg_extras
{
89 typedef pcg_extras::uint_x4
<uint32_t,uint64_t> pcg128_t
;
91 #define PCG_128BIT_CONSTANT(high,low) \
93 #define PCG_EMULATED_128BIT_MATH 1
97 namespace pcg_extras
{
100 * We often need to represent a "number of bits". When used normally, these
101 * numbers are never greater than 128, so an unsigned char is plenty.
102 * If you're using a nonstandard generator of a larger size, you can set
103 * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers
104 * might produce faster code if you set it to an unsigned int.)
107 #ifndef PCG_BITCOUNT_T
108 typedef uint8_t bitcount_t
;
110 typedef PCG_BITCOUNT_T bitcount_t
;
114 * C++ requires us to be able to serialize RNG state by printing or reading
115 * it from a stream. Because we use 128-bit ints, we also need to be able
116 * ot print them, so here is code to do so.
118 * This code provides enough functionality to print 128-bit ints in decimal
119 * and zero-padded in hex. It's not a full-featured implementation.
122 template <typename CharT
, typename Traits
>
123 std::basic_ostream
<CharT
,Traits
>&
124 operator<<(std::basic_ostream
<CharT
,Traits
>& out
, pcg128_t value
)
126 auto desired_base
= out
.flags() & out
.basefield
;
127 bool want_hex
= desired_base
== out
.hex
;
130 uint64_t highpart
= uint64_t(value
>> 64);
131 uint64_t lowpart
= uint64_t(value
);
132 auto desired_width
= out
.width();
133 if (desired_width
> 16) {
134 out
.width(desired_width
- 16);
136 if (highpart
!= 0 || desired_width
> 16)
141 oldfill
= out
.fill('0');
143 auto oldflags
= out
.setf(decltype(desired_base
){}, out
.showbase
);
151 constexpr size_t MAX_CHARS_128BIT
= 40;
153 char buffer
[MAX_CHARS_128BIT
];
154 char* pos
= buffer
+sizeof(buffer
);
156 constexpr auto BASE
= pcg128_t(10ULL);
158 auto div
= value
/ BASE
;
159 auto mod
= uint32_t(value
- (div
* BASE
));
160 *(--pos
) = '0' + mod
;
162 } while(value
!= pcg128_t(0ULL));
166 template <typename CharT
, typename Traits
>
167 std::basic_istream
<CharT
,Traits
>&
168 operator>>(std::basic_istream
<CharT
,Traits
>& in
, pcg128_t
& value
)
170 typename
std::basic_istream
<CharT
,Traits
>::sentry
s(in
);
175 constexpr auto BASE
= pcg128_t(10ULL);
176 pcg128_t
current(0ULL);
177 bool did_nothing
= true;
178 bool overflow
= false;
180 CharT wide_ch
= in
.get();
183 auto ch
= in
.narrow(wide_ch
, '\0');
184 if (ch
< '0' || ch
> '9') {
189 pcg128_t
digit(uint32_t(ch
- '0'));
190 pcg128_t timesbase
= current
*BASE
;
191 overflow
= overflow
|| timesbase
< current
;
192 current
= timesbase
+ digit
;
193 overflow
= overflow
|| current
< digit
;
196 if (did_nothing
|| overflow
) {
197 in
.setstate(std::ios::failbit
);
199 current
= ~pcg128_t(0ULL);
208 * Likewise, if people use tiny rngs, we'll be serializing uint8_t.
209 * If we just used the provided IO operators, they'd read/write chars,
210 * not ints, so we need to define our own. We *can* redefine this operator
211 * here because we're in our own namespace.
214 template <typename CharT
, typename Traits
>
215 std::basic_ostream
<CharT
,Traits
>&
216 operator<<(std::basic_ostream
<CharT
,Traits
>&out
, uint8_t value
)
218 return out
<< uint32_t(value
);
221 template <typename CharT
, typename Traits
>
222 std::basic_istream
<CharT
,Traits
>&
223 operator>>(std::basic_istream
<CharT
,Traits
>& in
, uint8_t& target
)
225 uint32_t value
= 0xdecea5edU
;
227 if (!in
&& value
== 0xdecea5edU
)
229 if (value
> uint8_t(~0)) {
230 in
.setstate(std::ios::failbit
);
233 target
= uint8_t(value
);
237 /* Unfortunately, the above functions don't get found in preference to the
238 * built in ones, so we create some more specific overloads that will.
242 inline std::ostream
& operator<<(std::ostream
& out
, uint8_t value
)
244 return pcg_extras::operator<< <char>(out
, value
);
247 inline std::istream
& operator>>(std::istream
& in
, uint8_t& value
)
249 return pcg_extras::operator>> <char>(in
, value
);
255 * Useful bitwise operations.
259 * XorShifts are invertable, but they are someting of a pain to invert.
260 * This function backs them out. It's used by the whacky "inside out"
261 * generator defined later.
264 template <typename itype
>
265 inline itype
unxorshift(itype x
, bitcount_t bits
, bitcount_t shift
)
267 if (2*shift
>= bits
) {
268 return x
^ (x
>> shift
);
270 itype lowmask1
= (itype(1U) << (bits
- shift
*2)) - 1;
271 itype highmask1
= ~lowmask1
;
273 itype bottom1
= x
& lowmask1
;
274 top1
^= top1
>> shift
;
277 itype lowmask2
= (itype(1U) << (bits
- shift
)) - 1;
278 itype bottom2
= x
& lowmask2
;
279 bottom2
= unxorshift(bottom2
, bits
- shift
, shift
);
281 return top1
| bottom2
;
285 * Rotate left and right.
287 * In ideal world, compilers would spot idiomatic rotate code and convert it
288 * to a rotate instruction. Of course, opinions vary on what the correct
289 * idiom is and how to spot it. For clang, sometimes it generates better
290 * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM.
293 template <typename itype
>
294 inline itype
rotl(itype value
, bitcount_t rot
)
296 constexpr bitcount_t bits
= sizeof(itype
) * 8;
297 constexpr bitcount_t mask
= bits
- 1;
298 #if PCG_USE_ZEROCHECK_ROTATE_IDIOM
299 return rot
? (value
<< rot
) | (value
>> (bits
- rot
)) : value
;
301 return (value
<< rot
) | (value
>> ((- rot
) & mask
));
305 template <typename itype
>
306 inline itype
rotr(itype value
, bitcount_t rot
)
308 constexpr bitcount_t bits
= sizeof(itype
) * 8;
309 constexpr bitcount_t mask
= bits
- 1;
310 #if PCG_USE_ZEROCHECK_ROTATE_IDIOM
311 return rot
? (value
>> rot
) | (value
<< (bits
- rot
)) : value
;
313 return (value
>> rot
) | (value
<< ((- rot
) & mask
));
317 /* Unfortunately, both Clang and GCC sometimes perform poorly when it comes
318 * to properly recognizing idiomatic rotate code, so for we also provide
319 * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss.
320 * (I hope that these compilers get better so that this code can die.)
322 * These overloads will be preferred over the general template code above.
324 #if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__)
326 inline uint8_t rotr(uint8_t value
, bitcount_t rot
)
328 asm ("rorb %%cl, %0" : "=r" (value
) : "0" (value
), "c" (rot
));
332 inline uint16_t rotr(uint16_t value
, bitcount_t rot
)
334 asm ("rorw %%cl, %0" : "=r" (value
) : "0" (value
), "c" (rot
));
338 inline uint32_t rotr(uint32_t value
, bitcount_t rot
)
340 asm ("rorl %%cl, %0" : "=r" (value
) : "0" (value
), "c" (rot
));
345 inline uint64_t rotr(uint64_t value
, bitcount_t rot
)
347 asm ("rorq %%cl, %0" : "=r" (value
) : "0" (value
), "c" (rot
));
352 #endif // PCG_USE_INLINE_ASM
356 * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of
357 * 32-bit integers with seed data, but sometimes we want to produce
358 * larger or smaller integers.
360 * The following code handles this annoyance.
362 * uneven_copy will copy an array of 32-bit ints to an array of larger or
363 * smaller ints (actually, the code is general it only needing forward
364 * iterators). The copy is identical to the one that would be performed if
365 * we just did memcpy on a standard little-endian machine, but works
366 * regardless of the endian of the machine (or the weirdness of the ints
369 * generate_to initializes an array of integers using a SeedSeq
370 * object. It is given the size as a static constant at compile time and
371 * tries to avoid memory allocation. If we're filling in 32-bit constants
372 * we just do it directly. If we need a separate buffer and it's small,
373 * we allocate it on the stack. Otherwise, we fall back to heap allocation.
376 * generate_one produces a single value of some integral type using a
380 /* uneven_copy helper, case where destination ints are less than 32 bit. */
382 template<class SrcIter
, class DestIter
>
383 SrcIter
uneven_copy_impl(
384 SrcIter src_first
, DestIter dest_first
, DestIter dest_last
,
387 typedef typename
std::iterator_traits
<SrcIter
>::value_type src_t
;
388 typedef typename
std::iterator_traits
<DestIter
>::value_type dest_t
;
390 constexpr bitcount_t SRC_SIZE
= sizeof(src_t
);
391 constexpr bitcount_t DEST_SIZE
= sizeof(dest_t
);
392 constexpr bitcount_t DEST_BITS
= DEST_SIZE
* 8;
393 constexpr bitcount_t SCALE
= SRC_SIZE
/ DEST_SIZE
;
398 while (dest_first
!= dest_last
) {
399 if ((count
++ % SCALE
) == 0)
400 value
= *src_first
++; // Get more bits
402 value
>>= DEST_BITS
; // Move down bits
404 *dest_first
++ = dest_t(value
); // Truncates, ignores high bits.
409 /* uneven_copy helper, case where destination ints are more than 32 bit. */
411 template<class SrcIter
, class DestIter
>
412 SrcIter
uneven_copy_impl(
413 SrcIter src_first
, DestIter dest_first
, DestIter dest_last
,
416 typedef typename
std::iterator_traits
<SrcIter
>::value_type src_t
;
417 typedef typename
std::iterator_traits
<DestIter
>::value_type dest_t
;
419 constexpr auto SRC_SIZE
= sizeof(src_t
);
420 constexpr auto SRC_BITS
= SRC_SIZE
* 8;
421 constexpr auto DEST_SIZE
= sizeof(dest_t
);
422 constexpr auto SCALE
= (DEST_SIZE
+SRC_SIZE
-1) / SRC_SIZE
;
424 while (dest_first
!= dest_last
) {
426 unsigned int shift
= 0;
428 for (size_t i
= 0; i
< SCALE
; ++i
) {
429 value
|= dest_t(*src_first
++) << shift
;
433 *dest_first
++ = value
;
438 /* uneven_copy, call the right code for larger vs. smaller */
440 template<class SrcIter
, class DestIter
>
441 inline SrcIter
uneven_copy(SrcIter src_first
,
442 DestIter dest_first
, DestIter dest_last
)
444 typedef typename
std::iterator_traits
<SrcIter
>::value_type src_t
;
445 typedef typename
std::iterator_traits
<DestIter
>::value_type dest_t
;
447 constexpr bool DEST_IS_SMALLER
= sizeof(dest_t
) < sizeof(src_t
);
449 return uneven_copy_impl(src_first
, dest_first
, dest_last
,
450 std::integral_constant
<bool, DEST_IS_SMALLER
>{});
453 /* generate_to, fill in a fixed-size array of integral type using a SeedSeq
454 * (actually works for any random-access iterator)
457 template <size_t size
, typename SeedSeq
, typename DestIter
>
458 inline void generate_to_impl(SeedSeq
&& generator
, DestIter dest
,
461 generator
.generate(dest
, dest
+size
);
464 template <size_t size
, typename SeedSeq
, typename DestIter
>
465 void generate_to_impl(SeedSeq
&& generator
, DestIter dest
,
468 typedef typename
std::iterator_traits
<DestIter
>::value_type dest_t
;
469 constexpr auto DEST_SIZE
= sizeof(dest_t
);
470 constexpr auto GEN_SIZE
= sizeof(uint32_t);
472 constexpr bool GEN_IS_SMALLER
= GEN_SIZE
< DEST_SIZE
;
473 constexpr size_t FROM_ELEMS
=
475 ? size
* ((DEST_SIZE
+GEN_SIZE
-1) / GEN_SIZE
)
476 : (size
+ (GEN_SIZE
/ DEST_SIZE
) - 1)
477 / ((GEN_SIZE
/ DEST_SIZE
) + GEN_IS_SMALLER
);
478 // this odd code ^^^^^^^^^^^^^^^^^ is work-around for
479 // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287
481 if (FROM_ELEMS
<= 1024) {
482 uint32_t buffer
[FROM_ELEMS
];
483 generator
.generate(buffer
, buffer
+FROM_ELEMS
);
484 uneven_copy(buffer
, dest
, dest
+size
);
486 uint32_t* buffer
= (uint32_t*) malloc(GEN_SIZE
* FROM_ELEMS
);
487 generator
.generate(buffer
, buffer
+FROM_ELEMS
);
488 uneven_copy(buffer
, dest
, dest
+size
);
493 template <size_t size
, typename SeedSeq
, typename DestIter
>
494 inline void generate_to(SeedSeq
&& generator
, DestIter dest
)
496 typedef typename
std::iterator_traits
<DestIter
>::value_type dest_t
;
497 constexpr bool IS_32BIT
= sizeof(dest_t
) == sizeof(uint32_t);
499 generate_to_impl
<size
>(std::forward
<SeedSeq
>(generator
), dest
,
500 std::integral_constant
<bool, IS_32BIT
>{});
503 /* generate_one, produce a value of integral type using a SeedSeq
504 * (optionally, we can have it produce more than one and pick which one
508 template <typename UInt
, size_t i
= 0UL, size_t N
= i
+1UL, typename SeedSeq
>
509 inline UInt
generate_one(SeedSeq
&& generator
)
512 generate_to
<N
>(std::forward
<SeedSeq
>(generator
), result
);
516 template <typename RngType
>
517 auto bounded_rand(RngType
& rng
, typename
RngType::result_type upper_bound
)
518 -> typename
RngType::result_type
520 typedef typename
RngType::result_type rtype
;
521 rtype threshold
= (RngType::max() - RngType::min() + rtype(1) - upper_bound
)
524 rtype r
= rng() - RngType::min();
526 return r
% upper_bound
;
530 template <typename Iter
, typename RandType
>
531 void shuffle(Iter from
, Iter to
, RandType
&& rng
)
533 typedef typename
std::iterator_traits
<Iter
>::difference_type delta_t
;
534 auto count
= to
- from
;
536 delta_t
chosen(bounded_rand(rng
, count
));
540 swap(*(from
+chosen
), *to
);
545 * Although std::seed_seq is useful, it isn't everything. Often we want to
546 * initialize a random-number generator some other way, such as from a random
549 * Technically, it does not meet the requirements of a SeedSequence because
550 * it lacks some of the rarely-used member functions (some of which would
551 * be impossible to provide). However the C++ standard is quite specific
552 * that actual engines only called the generate method, so it ought not to be
553 * a problem in practice.
556 template <typename RngType
>
557 class seed_seq_from
{
561 typedef uint_least32_t result_type
;
564 template<typename
... Args
>
565 seed_seq_from(Args
&&... args
) :
566 rng_(std::forward
<Args
>(args
)...)
568 // Nothing (else) to do...
571 template<typename Iter
>
572 void generate(Iter start
, Iter finish
)
574 for (auto i
= start
; i
!= finish
; ++i
)
575 *i
= result_type(rng_());
578 constexpr size_t size() const
580 return (sizeof(typename
RngType::result_type
) > sizeof(result_type
)
581 && RngType::max() > ~size_t(0UL))
583 : size_t(RngType::max());
588 * Sometimes you might want a distinct seed based on when the program
589 * was compiled. That way, a particular instance of the program will
590 * behave the same way, but when recompiled it'll produce a different
594 template <typename IntType
>
595 struct static_arbitrary_seed
{
597 static constexpr IntType
fnv(IntType hash
, const char* pos
) {
600 : fnv((hash
* IntType(16777619U)) ^ *pos
, (pos
+1));
604 static constexpr IntType value
= fnv(IntType(2166136261U ^ sizeof(IntType
)),
605 __DATE__ __TIME__ __FILE__
);
608 // Sometimes, when debugging or testing, it's handy to be able print the name
609 // of a (in human-readable form). This code allows the idiom:
611 // cout << printable_typename<my_foo_type_t>()
613 // to print out my_foo_type_t (or its concrete type if it is a synonym)
615 template <typename T
>
616 struct printable_typename
{};
618 template <typename T
>
619 std::ostream
& operator<<(std::ostream
& out
, printable_typename
<T
>) {
620 const char *implementation_typename
= typeid(T
).name();
623 const char* pretty_name
=
624 abi::__cxa_demangle(implementation_typename
, NULL
, NULL
, &status
);
627 free((void*) pretty_name
);
631 out
<< implementation_typename
;
635 } // namespace pcg_extras
637 #endif // PCG_EXTRAS_HPP_INCLUDED