pcg_extras.hpp

   1 /*
   2  * PCG Random Number Generation for C++
   3  *
   4  * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org>
   5  *
   6  * Licensed under the Apache License, Version 2.0 (the "License");
   7  * you may not use this file except in compliance with the License.
   8  * You may obtain a copy of the License at
   9  *
  10  *     http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  *
  18  * For additional information about the PCG random number generation scheme,
  19  * including its license and other licensing options, visit
  20  *
  21  *     http://www.pcg-random.org
  22  */
  23
  24 /*
  25  * This file provides support code that is useful for random-number generation
  26  * but not specific to the PCG generation scheme, including:
  27  *      - 128-bit int support for platforms where it isn't available natively
  28  *      - bit twiddling operations
  29  *      - I/O of 128-bit and 8-bit integers
  30  *      - Handling the evilness of SeedSeq
  31  *      - Support for efficiently producing random numbers less than a given
  32  *        bound
  33  */
  34
  35 #ifndef PCG_EXTRAS_HPP_INCLUDED
  36 #define PCG_EXTRAS_HPP_INCLUDED 1
  37
  38 #include <cinttypes>
  39 #include <cstddef>
  40 #include <cstdlib>
  41 #include <cstring>
  42 #include <cassert>
  43 #include <limits>
  44 #include <iostream>
  45 #include <type_traits>
  46 #include <utility>
  47 #include <locale>
  48 #include <iterator>
  49 #include <utility>
  50
  51 #ifdef __GNUC__
  52     #include <cxxabi.h>
  53 #endif
  54
  55 /*
  56  * Abstractions for compiler-specific directives
  57  */
  58
  59 #ifdef __GNUC__
  60     #define PCG_NOINLINE __attribute__((noinline))
  61 #else
  62     #define PCG_NOINLINE
  63 #endif
  64
  65 /*
  66  * Some members of the PCG library use 128-bit math.  When compiling on 64-bit
  67  * platforms, both GCC and Clang provide 128-bit integer types that are ideal
  68  * for the job.
  69  *
  70  * On 32-bit platforms (or with other compilers), we fall back to a C++
  71  * class that provides 128-bit unsigned integers instead.  It may seem
  72  * like we're reinventing the wheel here, because libraries already exist
  73  * that support large integers, but most existing libraries provide a very
  74  * generic multiprecision code, but here we're operating at a fixed size.
  75  * Also, most other libraries are fairly heavyweight.  So we use a direct
  76  * implementation.  Sadly, it's much slower than hand-coded assembly or
  77  * direct CPU support.
  78  *
  79  */
  80 #if __SIZEOF_INT128__
  81     namespace pcg_extras {
  82         typedef __uint128_t pcg128_t;
  83     }
  84     #define PCG_128BIT_CONSTANT(high,low) \
  85             ((pcg128_t(high) << 64) + low)
  86 #else
  87     #include "pcg_uint128.hpp"
  88     namespace pcg_extras {
  89         typedef pcg_extras::uint_x4<uint32_t,uint64_t> pcg128_t;
  90     }
  91     #define PCG_128BIT_CONSTANT(high,low) \
  92             pcg128_t(high,low)
  93     #define PCG_EMULATED_128BIT_MATH 1
  94 #endif
  95
  96
  97 namespace pcg_extras {
  98
  99 /*
 100  * We often need to represent a "number of bits".  When used normally, these
 101  * numbers are never greater than 128, so an unsigned char is plenty.
 102  * If you're using a nonstandard generator of a larger size, you can set
 103  * PCG_BITCOUNT_T to have it define it as a larger size.  (Some compilers
 104  * might produce faster code if you set it to an unsigned int.)
 105  */
 106
 107 #ifndef PCG_BITCOUNT_T
 108     typedef uint8_t bitcount_t;
 109 #else
 110     typedef PCG_BITCOUNT_T bitcount_t;
 111 #endif
 112
 113 /*
 114  * C++ requires us to be able to serialize RNG state by printing or reading
 115  * it from a stream.  Because we use 128-bit ints, we also need to be able
 116  * ot print them, so here is code to do so.
 117  *
 118  * This code provides enough functionality to print 128-bit ints in decimal
 119  * and zero-padded in hex.  It's not a full-featured implementation.
 120  */
 121
 122 template <typename CharT, typename Traits>
 123 std::basic_ostream<CharT,Traits>&
 124 operator<<(std::basic_ostream<CharT,Traits>& out, pcg128_t value)
 125 {
 126     auto desired_base = out.flags() & out.basefield;
 127     bool want_hex = desired_base == out.hex;
 128
 129     if (want_hex) {
 130         uint64_t highpart = uint64_t(value >> 64);
 131         uint64_t lowpart  = uint64_t(value);
 132         auto desired_width = out.width();
 133         if (desired_width > 16) {
 134             out.width(desired_width - 16);
 135         }
 136         if (highpart != 0 || desired_width > 16)
 137             out << highpart;
 138         CharT oldfill;
 139         if (highpart != 0) {
 140             out.width(16);
 141             oldfill = out.fill('0');
 142         }
 143         auto oldflags = out.setf(decltype(desired_base){}, out.showbase);
 144         out << lowpart;
 145         out.setf(oldflags);
 146         if (highpart != 0) {
 147             out.fill(oldfill);
 148         }
 149         return out;
 150     }
 151     constexpr size_t MAX_CHARS_128BIT = 40;
 152
 153     char buffer[MAX_CHARS_128BIT];
 154     char* pos = buffer+sizeof(buffer);
 155     *(--pos) = '\0';
 156     constexpr auto BASE = pcg128_t(10ULL);
 157     do {
 158         auto div = value / BASE;
 159         auto mod = uint32_t(value - (div * BASE));
 160         *(--pos) = '0' + mod;
 161         value = div;
 162     } while(value != pcg128_t(0ULL));
 163     return out << pos;
 164 }
 165
 166 template <typename CharT, typename Traits>
 167 std::basic_istream<CharT,Traits>&
 168 operator>>(std::basic_istream<CharT,Traits>& in, pcg128_t& value)
 169 {
 170     typename std::basic_istream<CharT,Traits>::sentry s(in);
 171
 172     if (!s)
 173          return in;
 174
 175     constexpr auto BASE = pcg128_t(10ULL);
 176     pcg128_t current(0ULL);
 177     bool did_nothing = true;
 178     bool overflow = false;
 179     for(;;) {
 180         CharT wide_ch = in.get();
 181         if (!in.good())
 182             break;
 183         auto ch = in.narrow(wide_ch, '\0');
 184         if (ch < '0' || ch > '9') {
 185             in.unget();
 186             break;
 187         }
 188         did_nothing = false;
 189         pcg128_t digit(uint32_t(ch - '0'));
 190         pcg128_t timesbase = current*BASE;
 191         overflow = overflow || timesbase < current;
 192         current = timesbase + digit;
 193         overflow = overflow || current < digit;
 194     }
 195
 196     if (did_nothing || overflow) {
 197         in.setstate(std::ios::failbit);
 198         if (overflow)
 199             current = ~pcg128_t(0ULL);
 200     }
 201
 202     value = current;
 203
 204     return in;
 205 }
 206
 207 /*
 208  * Likewise, if people use tiny rngs, we'll be serializing uint8_t.
 209  * If we just used the provided IO operators, they'd read/write chars,
 210  * not ints, so we need to define our own.  We *can* redefine this operator
 211  * here because we're in our own namespace.
 212  */
 213
 214 template <typename CharT, typename Traits>
 215 std::basic_ostream<CharT,Traits>&
 216 operator<<(std::basic_ostream<CharT,Traits>&out, uint8_t value)
 217 {
 218     return out << uint32_t(value);
 219 }
 220
 221 template <typename CharT, typename Traits>
 222 std::basic_istream<CharT,Traits>&
 223 operator>>(std::basic_istream<CharT,Traits>& in, uint8_t& target)
 224 {
 225     uint32_t value = 0xdecea5edU;
 226     in >> value;
 227     if (!in && value == 0xdecea5edU)
 228         return in;
 229     if (value > uint8_t(~0)) {
 230         in.setstate(std::ios::failbit);
 231         value = ~0U;
 232     }
 233     target = uint8_t(value);
 234     return in;
 235 }
 236
 237 /* Unfortunately, the above functions don't get found in preference to the
 238  * built in ones, so we create some more specific overloads that will.
 239  * Ugh.
 240  */
 241
 242 inline std::ostream& operator<<(std::ostream& out, uint8_t value)
 243 {
 244     return pcg_extras::operator<< <char>(out, value);
 245 }
 246
 247 inline std::istream& operator>>(std::istream& in, uint8_t& value)
 248 {
 249     return pcg_extras::operator>> <char>(in, value);
 250 }
 251
 252
 253
 254 /*
 255  * Useful bitwise operations.
 256  */
 257
 258 /*
 259  * XorShifts are invertable, but they are someting of a pain to invert.
 260  * This function backs them out.  It's used by the whacky "inside out"
 261  * generator defined later.
 262  */
 263
 264 template <typename itype>
 265 inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift)
 266 {
 267     if (2*shift >= bits) {
 268         return x ^ (x >> shift);
 269     }
 270     itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1;
 271     itype highmask1 = ~lowmask1;
 272     itype top1 = x;
 273     itype bottom1 = x & lowmask1;
 274     top1 ^= top1 >> shift;
 275     top1 &= highmask1;
 276     x = top1 | bottom1;
 277     itype lowmask2 = (itype(1U) << (bits - shift)) - 1;
 278     itype bottom2 = x & lowmask2;
 279     bottom2 = unxorshift(bottom2, bits - shift, shift);
 280     bottom2 &= lowmask1;
 281     return top1 | bottom2;
 282 }
 283
 284 /*
 285  * Rotate left and right.
 286  *
 287  * In ideal world, compilers would spot idiomatic rotate code and convert it
 288  * to a rotate instruction.  Of course, opinions vary on what the correct
 289  * idiom is and how to spot it.  For clang, sometimes it generates better
 290  * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM.
 291  */
 292
 293 template <typename itype>
 294 inline itype rotl(itype value, bitcount_t rot)
 295 {
 296     constexpr bitcount_t bits = sizeof(itype) * 8;
 297     constexpr bitcount_t mask = bits - 1;
 298 #if PCG_USE_ZEROCHECK_ROTATE_IDIOM
 299     return rot ? (value << rot) | (value >> (bits - rot)) : value;
 300 #else
 301     return (value << rot) | (value >> ((- rot) & mask));
 302 #endif
 303 }
 304
 305 template <typename itype>
 306 inline itype rotr(itype value, bitcount_t rot)
 307 {
 308     constexpr bitcount_t bits = sizeof(itype) * 8;
 309     constexpr bitcount_t mask = bits - 1;
 310 #if PCG_USE_ZEROCHECK_ROTATE_IDIOM
 311     return rot ? (value >> rot) | (value << (bits - rot)) : value;
 312 #else
 313     return (value >> rot) | (value << ((- rot) & mask));
 314 #endif
 315 }
 316
 317 /* Unfortunately, both Clang and GCC sometimes perform poorly when it comes
 318  * to properly recognizing idiomatic rotate code, so for we also provide
 319  * assembler directives (enabled with PCG_USE_INLINE_ASM).  Boo, hiss.
 320  * (I hope that these compilers get better so that this code can die.)
 321  *
 322  * These overloads will be preferred over the general template code above.
 323  */
 324 #if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__  || __i386__)
 325
 326 inline uint8_t rotr(uint8_t value, bitcount_t rot)
 327 {
 328     asm ("rorb   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
 329     return value;
 330 }
 331
 332 inline uint16_t rotr(uint16_t value, bitcount_t rot)
 333 {
 334     asm ("rorw   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
 335     return value;
 336 }
 337
 338 inline uint32_t rotr(uint32_t value, bitcount_t rot)
 339 {
 340     asm ("rorl   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
 341     return value;
 342 }
 343
 344 #if __x86_64__
 345 inline uint64_t rotr(uint64_t value, bitcount_t rot)
 346 {
 347     asm ("rorq   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
 348     return value;
 349 }
 350 #endif // __x86_64__
 351
 352 #endif // PCG_USE_INLINE_ASM
 353
 354
 355 /*
 356  * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of
 357  * 32-bit integers with seed data, but sometimes we want to produce
 358  * larger or smaller integers.
 359  *
 360  * The following code handles this annoyance.
 361  *
 362  * uneven_copy will copy an array of 32-bit ints to an array of larger or
 363  * smaller ints (actually, the code is general it only needing forward
 364  * iterators).  The copy is identical to the one that would be performed if
 365  * we just did memcpy on a standard little-endian machine, but works
 366  * regardless of the endian of the machine (or the weirdness of the ints
 367  * involved).
 368  *
 369  * generate_to initializes an array of integers using a SeedSeq
 370  * object.  It is given the size as a static constant at compile time and
 371  * tries to avoid memory allocation.  If we're filling in 32-bit constants
 372  * we just do it directly.  If we need a separate buffer and it's small,
 373  * we allocate it on the stack.  Otherwise, we fall back to heap allocation.
 374  * Ugh.
 375  *
 376  * generate_one produces a single value of some integral type using a
 377  * SeedSeq object.
 378  */
 379
 380  /* uneven_copy helper, case where destination ints are less than 32 bit. */
 381
 382 template<class SrcIter, class DestIter>
 383 SrcIter uneven_copy_impl(
 384     SrcIter src_first, DestIter dest_first, DestIter dest_last,
 385     std::true_type)
 386 {
 387     typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
 388     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
 389
 390     constexpr bitcount_t SRC_SIZE  = sizeof(src_t);
 391     constexpr bitcount_t DEST_SIZE = sizeof(dest_t);
 392     constexpr bitcount_t DEST_BITS = DEST_SIZE * 8;
 393     constexpr bitcount_t SCALE     = SRC_SIZE / DEST_SIZE;
 394
 395     size_t count = 0;
 396     src_t value;
 397
 398     while (dest_first != dest_last) {
 399         if ((count++ % SCALE) == 0)
 400             value = *src_first++;       // Get more bits
 401         else
 402             value >>= DEST_BITS;        // Move down bits
 403
 404         *dest_first++ = dest_t(value);  // Truncates, ignores high bits.
 405     }
 406     return src_first;
 407 }
 408
 409  /* uneven_copy helper, case where destination ints are more than 32 bit. */
 410
 411 template<class SrcIter, class DestIter>
 412 SrcIter uneven_copy_impl(
 413     SrcIter src_first, DestIter dest_first, DestIter dest_last,
 414     std::false_type)
 415 {
 416     typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
 417     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
 418
 419     constexpr auto SRC_SIZE  = sizeof(src_t);
 420     constexpr auto SRC_BITS  = SRC_SIZE * 8;
 421     constexpr auto DEST_SIZE = sizeof(dest_t);
 422     constexpr auto SCALE     = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE;
 423
 424     while (dest_first != dest_last) {
 425         dest_t value(0UL);
 426         unsigned int shift = 0;
 427
 428         for (size_t i = 0; i < SCALE; ++i) {
 429             value |= dest_t(*src_first++) << shift;
 430             shift += SRC_BITS;
 431         }
 432
 433         *dest_first++ = value;
 434     }
 435     return src_first;
 436 }
 437
 438 /* uneven_copy, call the right code for larger vs. smaller */
 439
 440 template<class SrcIter, class DestIter>
 441 inline SrcIter uneven_copy(SrcIter src_first,
 442                            DestIter dest_first, DestIter dest_last)
 443 {
 444     typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
 445     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
 446
 447     constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t);
 448
 449     return uneven_copy_impl(src_first, dest_first, dest_last,
 450                             std::integral_constant<bool, DEST_IS_SMALLER>{});
 451 }
 452
 453 /* generate_to, fill in a fixed-size array of integral type using a SeedSeq
 454  * (actually works for any random-access iterator)
 455  */
 456
 457 template <size_t size, typename SeedSeq, typename DestIter>
 458 inline void generate_to_impl(SeedSeq&& generator, DestIter dest,
 459                              std::true_type)
 460 {
 461     generator.generate(dest, dest+size);
 462 }
 463
 464 template <size_t size, typename SeedSeq, typename DestIter>
 465 void generate_to_impl(SeedSeq&& generator, DestIter dest,
 466                       std::false_type)
 467 {
 468     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
 469     constexpr auto DEST_SIZE = sizeof(dest_t);
 470     constexpr auto GEN_SIZE  = sizeof(uint32_t);
 471
 472     constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE;
 473     constexpr size_t FROM_ELEMS =
 474         GEN_IS_SMALLER
 475             ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE)
 476             : (size + (GEN_SIZE / DEST_SIZE) - 1)
 477                 / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER);
 478                         //  this odd code ^^^^^^^^^^^^^^^^^ is work-around for
 479                         //  a bug: http://llvm.org/bugs/show_bug.cgi?id=21287
 480
 481     if (FROM_ELEMS <= 1024) {
 482         uint32_t buffer[FROM_ELEMS];
 483         generator.generate(buffer, buffer+FROM_ELEMS);
 484         uneven_copy(buffer, dest, dest+size);
 485     } else {
 486         uint32_t* buffer = (uint32_t*) malloc(GEN_SIZE * FROM_ELEMS);
 487         generator.generate(buffer, buffer+FROM_ELEMS);
 488         uneven_copy(buffer, dest, dest+size);
 489         free(buffer);
 490     }
 491 }
 492
 493 template <size_t size, typename SeedSeq, typename DestIter>
 494 inline void generate_to(SeedSeq&& generator, DestIter dest)
 495 {
 496     typedef typename std::iterator_traits<DestIter>::value_type dest_t;
 497     constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t);
 498
 499     generate_to_impl<size>(std::forward<SeedSeq>(generator), dest,
 500                            std::integral_constant<bool, IS_32BIT>{});
 501 }
 502
 503 /* generate_one, produce a value of integral type using a SeedSeq
 504  * (optionally, we can have it produce more than one and pick which one
 505  * we want)
 506  */
 507
 508 template <typename UInt, size_t i = 0UL, size_t N = i+1UL, typename SeedSeq>
 509 inline UInt generate_one(SeedSeq&& generator)
 510 {
 511     UInt result[N];
 512     generate_to<N>(std::forward<SeedSeq>(generator), result);
 513     return result[i];
 514 }
 515
 516 template <typename RngType>
 517 auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound)
 518         -> typename RngType::result_type
 519 {
 520     typedef typename RngType::result_type rtype;
 521     rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound)
 522                     % upper_bound;
 523     for (;;) {
 524         rtype r = rng() - RngType::min();
 525         if (r >= threshold)
 526             return r % upper_bound;
 527     }
 528 }
 529
 530 template <typename Iter, typename RandType>
 531 void shuffle(Iter from, Iter to, RandType&& rng)
 532 {
 533     typedef typename std::iterator_traits<Iter>::difference_type delta_t;
 534     auto count = to - from;
 535     while (count > 1) {
 536         delta_t chosen(bounded_rand(rng, count));
 537         --count;
 538         --to;
 539         using std::swap;
 540         swap(*(from+chosen), *to);
 541     }
 542 }
 543
 544 /*
 545  * Although std::seed_seq is useful, it isn't everything.  Often we want to
 546  * initialize a random-number generator some other way, such as from a random
 547  * device.
 548  *
 549  * Technically, it does not meet the requirements of a SeedSequence because
 550  * it lacks some of the rarely-used member functions (some of which would
 551  * be impossible to provide).  However the C++ standard is quite specific
 552  * that actual engines only called the generate method, so it ought not to be
 553  * a problem in practice.
 554  */
 555
 556 template <typename RngType>
 557 class seed_seq_from {
 558 private:
 559     RngType rng_;
 560
 561     typedef uint_least32_t result_type;
 562
 563 public:
 564     template<typename... Args>
 565     seed_seq_from(Args&&... args) :
 566         rng_(std::forward<Args>(args)...)
 567     {
 568         // Nothing (else) to do...
 569     }
 570
 571     template<typename Iter>
 572     void generate(Iter start, Iter finish)
 573     {
 574         for (auto i = start; i != finish; ++i)
 575             *i = result_type(rng_());
 576     }
 577
 578     constexpr size_t size() const
 579     {
 580         return (sizeof(typename RngType::result_type) > sizeof(result_type)
 581                 && RngType::max() > ~size_t(0UL))
 582              ? ~size_t(0UL)
 583              : size_t(RngType::max());
 584     }
 585 };
 586
 587 /*
 588  * Sometimes you might want a distinct seed based on when the program
 589  * was compiled.  That way, a particular instance of the program will
 590  * behave the same way, but when recompiled it'll produce a different
 591  * value.
 592  */
 593
 594 template <typename IntType>
 595 struct static_arbitrary_seed {
 596 private:
 597     static constexpr IntType fnv(IntType hash, const char* pos) {
 598         return *pos == '\0'
 599              ? hash
 600              : fnv((hash * IntType(16777619U)) ^ *pos, (pos+1));
 601     }
 602
 603 public:
 604     static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)),
 605                         __DATE__ __TIME__ __FILE__);
 606 };
 607
 608 // Sometimes, when debugging or testing, it's handy to be able print the name
 609 // of a (in human-readable form).  This code allows the idiom:
 610 //
 611 //      cout << printable_typename<my_foo_type_t>()
 612 //
 613 // to print out my_foo_type_t (or its concrete type if it is a synonym)
 614
 615 template <typename T>
 616 struct printable_typename {};
 617
 618 template <typename T>
 619 std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
 620     const char *implementation_typename = typeid(T).name();
 621 #ifdef __GNUC__
 622     int status;
 623     const char* pretty_name =
 624         abi::__cxa_demangle(implementation_typename, NULL, NULL, &status);
 625     if (status == 0)
 626         out << pretty_name;
 627     free((void*) pretty_name);
 628     if (status == 0)
 629         return out;
 630 #endif
 631     out << implementation_typename;
 632     return out;
 633 }
 634
 635 } // namespace pcg_extras
 636
 637 #endif // PCG_EXTRAS_HPP_INCLUDED