Lib/random.py

   1 """Random variable generators.
   2
   3     integers
   4     --------
   5            uniform within range
   6
   7     sequences
   8     ---------
   9            pick random element
  10            generate random permutation
  11
  12     distributions on the real line:
  13     ------------------------------
  14            uniform
  15            normal (Gaussian)
  16            lognormal
  17            negative exponential
  18            gamma
  19            beta
  20
  21     distributions on the circle (angles 0 to 2pi)
  22     ---------------------------------------------
  23            circular uniform
  24            von Mises
  25
  26 Translated from anonymously contributed C/C++ source.
  27
  28 Multi-threading note:  the random number generator used here is not thread-
  29 safe; it is possible that two calls return the same random value.  However,
  30 you can instantiate a different instance of Random() in each thread to get
  31 generators that don't share state, then use .setstate() and .jumpahead() to
  32 move the generators to disjoint segments of the full period.  For example,
  33
  34 def create_generators(num, delta, firstseed=None):
  35     ""\"Return list of num distinct generators.
  36     Each generator has its own unique segment of delta elements from
  37     Random.random()'s full period.
  38     Seed the first generator with optional arg firstseed (default is
  39     None, to seed from current time).
  40     ""\"
  41
  42     from random import Random
  43     g = Random(firstseed)
  44     result = [g]
  45     for i in range(num - 1):
  46         laststate = g.getstate()
  47         g = Random()
  48         g.setstate(laststate)
  49         g.jumpahead(delta)
  50         result.append(g)
  51     return result
  52
  53 gens = create_generators(10, 1000000)
  54
  55 That creates 10 distinct generators, which can be passed out to 10 distinct
  56 threads.  The generators don't share state so can be called safely in
  57 parallel.  So long as no thread calls its g.random() more than a million
  58 times (the second argument to create_generators), the sequences seen by
  59 each thread will not overlap.
  60
  61 The period of the underlying Wichmann-Hill generator is 6,953,607,871,644,
  62 and that limits how far this technique can be pushed.
  63
  64 Just for fun, note that since we know the period, .jumpahead() can also be
  65 used to "move backward in time":
  66
  67 >>> g = Random(42)  # arbitrary
  68 >>> g.random()
  69 0.25420336316883324
  70 >>> g.jumpahead(6953607871644L - 1) # move *back* one
  71 >>> g.random()
  72 0.25420336316883324
  73 """
  74 # XXX The docstring sucks.
  75
  76 from math import log as _log, exp as _exp, pi as _pi, e as _e
  77 from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
  78
  79 __all__ = ["Random","seed","random","uniform","randint","choice",
  80            "randrange","shuffle","normalvariate","lognormvariate",
  81            "cunifvariate","expovariate","vonmisesvariate","gammavariate",
  82            "stdgamma","gauss","betavariate","paretovariate","weibullvariate",
  83            "getstate","setstate","jumpahead","whseed"]
  84
  85 def _verify(name, expected):
  86     computed = eval(name)
  87     if abs(computed - expected) > 1e-7:
  88         raise ValueError(
  89             "computed value for %s deviates too much "
  90             "(computed %g, expected %g)" % (name, computed, expected))
  91
  92 NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0)
  93 _verify('NV_MAGICCONST', 1.71552776992141)
  94
  95 TWOPI = 2.0*_pi
  96 _verify('TWOPI', 6.28318530718)
  97
  98 LOG4 = _log(4.0)
  99 _verify('LOG4', 1.38629436111989)
 100
 101 SG_MAGICCONST = 1.0 + _log(4.5)
 102 _verify('SG_MAGICCONST', 2.50407739677627)
 103
 104 del _verify
 105
 106 # Translated by Guido van Rossum from C source provided by
 107 # Adrian Baddeley.
 108
 109 class Random:
 110
 111     VERSION = 1     # used by getstate/setstate
 112
 113     def __init__(self, x=None):
 114         """Initialize an instance.
 115
 116         Optional argument x controls seeding, as for Random.seed().
 117         """
 118
 119         self.seed(x)
 120         self.gauss_next = None
 121
 122 ## -------------------- core generator -------------------
 123
 124     # Specific to Wichmann-Hill generator.  Subclasses wishing to use a
 125     # different core generator should override the seed(), random(),
 126     # getstate(), setstate() and jumpahead() methods.
 127
 128     def seed(self, a=None):
 129         """Initialize internal state from hashable object.
 130
 131         None or no argument seeds from current time.
 132
 133         If a is not None or an int or long, hash(a) is used instead.
 134
 135         If a is an int or long, a is used directly.  Distinct values between
 136         0 and 27814431486575L inclusive are guaranteed to yield distinct
 137         internal states (this guarantee is specific to the default
 138         Wichmann-Hill generator).
 139         """
 140
 141         if a is None:
 142             # Initialize from current time
 143             import time
 144             a = long(time.time() * 256)
 145
 146         if type(a) not in (type(3), type(3L)):
 147             a = hash(a)
 148
 149         a, x = divmod(a, 30268)
 150         a, y = divmod(a, 30306)
 151         a, z = divmod(a, 30322)
 152         self._seed = int(x)+1, int(y)+1, int(z)+1
 153
 154     def random(self):
 155         """Get the next random number in the range [0.0, 1.0)."""
 156
 157         # Wichman-Hill random number generator.
 158         #
 159         # Wichmann, B. A. & Hill, I. D. (1982)
 160         # Algorithm AS 183:
 161         # An efficient and portable pseudo-random number generator
 162         # Applied Statistics 31 (1982) 188-190
 163         #
 164         # see also:
 165         #        Correction to Algorithm AS 183
 166         #        Applied Statistics 33 (1984) 123
 167         #
 168         #        McLeod, A. I. (1985)
 169         #        A remark on Algorithm AS 183
 170         #        Applied Statistics 34 (1985),198-200
 171
 172         # This part is thread-unsafe:
 173         # BEGIN CRITICAL SECTION
 174         x, y, z = self._seed
 175         x = (171 * x) % 30269
 176         y = (172 * y) % 30307
 177         z = (170 * z) % 30323
 178         self._seed = x, y, z
 179         # END CRITICAL SECTION
 180
 181         # Note:  on a platform using IEEE-754 double arithmetic, this can
 182         # never return 0.0 (asserted by Tim; proof too long for a comment).
 183         return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0
 184
 185     def getstate(self):
 186         """Return internal state; can be passed to setstate() later."""
 187         return self.VERSION, self._seed, self.gauss_next
 188
 189     def setstate(self, state):
 190         """Restore internal state from object returned by getstate()."""
 191         version = state[0]
 192         if version == 1:
 193             version, self._seed, self.gauss_next = state
 194         else:
 195             raise ValueError("state with version %s passed to "
 196                              "Random.setstate() of version %s" %
 197                              (version, self.VERSION))
 198
 199     def jumpahead(self, n):
 200         """Act as if n calls to random() were made, but quickly.
 201
 202         n is an int, greater than or equal to 0.
 203
 204         Example use:  If you have 2 threads and know that each will
 205         consume no more than a million random numbers, create two Random
 206         objects r1 and r2, then do
 207             r2.setstate(r1.getstate())
 208             r2.jumpahead(1000000)
 209         Then r1 and r2 will use guaranteed-disjoint segments of the full
 210         period.
 211         """
 212
 213         if not n >= 0:
 214             raise ValueError("n must be >= 0")
 215         x, y, z = self._seed
 216         x = int(x * pow(171, n, 30269)) % 30269
 217         y = int(y * pow(172, n, 30307)) % 30307
 218         z = int(z * pow(170, n, 30323)) % 30323
 219         self._seed = x, y, z
 220
 221     def __whseed(self, x=0, y=0, z=0):
 222         """Set the Wichmann-Hill seed from (x, y, z).
 223
 224         These must be integers in the range [0, 256).
 225         """
 226
 227         if not type(x) == type(y) == type(z) == type(0):
 228             raise TypeError('seeds must be integers')
 229         if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256):
 230             raise ValueError('seeds must be in range(0, 256)')
 231         if 0 == x == y == z:
 232             # Initialize from current time
 233             import time
 234             t = long(time.time() * 256)
 235             t = int((t&0xffffff) ^ (t>>24))
 236             t, x = divmod(t, 256)
 237             t, y = divmod(t, 256)
 238             t, z = divmod(t, 256)
 239         # Zero is a poor seed, so substitute 1
 240         self._seed = (x or 1, y or 1, z or 1)
 241
 242     def whseed(self, a=None):
 243         """Seed from hashable object's hash code.
 244
 245         None or no argument seeds from current time.  It is not guaranteed
 246         that objects with distinct hash codes lead to distinct internal
 247         states.
 248
 249         This is obsolete, provided for compatibility with the seed routine
 250         used prior to Python 2.1.  Use the .seed() method instead.
 251         """
 252
 253         if a is None:
 254             self.__whseed()
 255             return
 256         a = hash(a)
 257         a, x = divmod(a, 256)
 258         a, y = divmod(a, 256)
 259         a, z = divmod(a, 256)
 260         x = (x + a) % 256 or 1
 261         y = (y + a) % 256 or 1
 262         z = (z + a) % 256 or 1
 263         self.__whseed(x, y, z)
 264
 265 ## ---- Methods below this point do not need to be overridden when
 266 ## ---- subclassing for the purpose of using a different core generator.
 267
 268 ## -------------------- pickle support  -------------------
 269
 270     def __getstate__(self): # for pickle
 271         return self.getstate()
 272
 273     def __setstate__(self, state):  # for pickle
 274         self.setstate(state)
 275
 276 ## -------------------- integer methods  -------------------
 277
 278     def randrange(self, start, stop=None, step=1, int=int, default=None):
 279         """Choose a random item from range(start, stop[, step]).
 280
 281         This fixes the problem with randint() which includes the
 282         endpoint; in Python this is usually not what you want.
 283         Do not supply the 'int' and 'default' arguments.
 284         """
 285
 286         # This code is a bit messy to make it fast for the
 287         # common case while still doing adequate error checking
 288         istart = int(start)
 289         if istart != start:
 290             raise ValueError, "non-integer arg 1 for randrange()"
 291         if stop is default:
 292             if istart > 0:
 293                 return int(self.random() * istart)
 294             raise ValueError, "empty range for randrange()"
 295         istop = int(stop)
 296         if istop != stop:
 297             raise ValueError, "non-integer stop for randrange()"
 298         if step == 1:
 299             if istart < istop:
 300                 return istart + int(self.random() *
 301                                    (istop - istart))
 302             raise ValueError, "empty range for randrange()"
 303         istep = int(step)
 304         if istep != step:
 305             raise ValueError, "non-integer step for randrange()"
 306         if istep > 0:
 307             n = (istop - istart + istep - 1) / istep
 308         elif istep < 0:
 309             n = (istop - istart + istep + 1) / istep
 310         else:
 311             raise ValueError, "zero step for randrange()"
 312
 313         if n <= 0:
 314             raise ValueError, "empty range for randrange()"
 315         return istart + istep*int(self.random() * n)
 316
 317     def randint(self, a, b):
 318         """Return random integer in range [a, b], including both end points.
 319
 320         (Deprecated; use randrange(a, b+1).)
 321         """
 322
 323         return self.randrange(a, b+1)
 324
 325 ## -------------------- sequence methods  -------------------
 326
 327     def choice(self, seq):
 328         """Choose a random element from a non-empty sequence."""
 329         return seq[int(self.random() * len(seq))]
 330
 331     def shuffle(self, x, random=None, int=int):
 332         """x, random=random.random -> shuffle list x in place; return None.
 333
 334         Optional arg random is a 0-argument function returning a random
 335         float in [0.0, 1.0); by default, the standard random.random.
 336
 337         Note that for even rather small len(x), the total number of
 338         permutations of x is larger than the period of most random number
 339         generators; this implies that "most" permutations of a long
 340         sequence can never be generated.
 341         """
 342
 343         if random is None:
 344             random = self.random
 345         for i in xrange(len(x)-1, 0, -1):
 346             # pick an element in x[:i+1] with which to exchange x[i]
 347             j = int(random() * (i+1))
 348             x[i], x[j] = x[j], x[i]
 349
 350 ## -------------------- real-valued distributions  -------------------
 351
 352 ## -------------------- uniform distribution -------------------
 353
 354     def uniform(self, a, b):
 355         """Get a random number in the range [a, b)."""
 356         return a + (b-a) * self.random()
 357
 358 ## -------------------- normal distribution --------------------
 359
 360     def normalvariate(self, mu, sigma):
 361         # mu = mean, sigma = standard deviation
 362
 363         # Uses Kinderman and Monahan method. Reference: Kinderman,
 364         # A.J. and Monahan, J.F., "Computer generation of random
 365         # variables using the ratio of uniform deviates", ACM Trans
 366         # Math Software, 3, (1977), pp257-260.
 367
 368         random = self.random
 369         while 1:
 370             u1 = random()
 371             u2 = random()
 372             z = NV_MAGICCONST*(u1-0.5)/u2
 373             zz = z*z/4.0
 374             if zz <= -_log(u2):
 375                 break
 376         return mu + z*sigma
 377
 378 ## -------------------- lognormal distribution --------------------
 379
 380     def lognormvariate(self, mu, sigma):
 381         return _exp(self.normalvariate(mu, sigma))
 382
 383 ## -------------------- circular uniform --------------------
 384
 385     def cunifvariate(self, mean, arc):
 386         # mean: mean angle (in radians between 0 and pi)
 387         # arc:  range of distribution (in radians between 0 and pi)
 388
 389         return (mean + arc * (self.random() - 0.5)) % _pi
 390
 391 ## -------------------- exponential distribution --------------------
 392
 393     def expovariate(self, lambd):
 394         # lambd: rate lambd = 1/mean
 395         # ('lambda' is a Python reserved word)
 396
 397         random = self.random
 398         u = random()
 399         while u <= 1e-7:
 400             u = random()
 401         return -_log(u)/lambd
 402
 403 ## -------------------- von Mises distribution --------------------
 404
 405     def vonmisesvariate(self, mu, kappa):
 406         # mu:    mean angle (in radians between 0 and 2*pi)
 407         # kappa: concentration parameter kappa (>= 0)
 408         # if kappa = 0 generate uniform random angle
 409
 410         # Based upon an algorithm published in: Fisher, N.I.,
 411         # "Statistical Analysis of Circular Data", Cambridge
 412         # University Press, 1993.
 413
 414         # Thanks to Magnus Kessler for a correction to the
 415         # implementation of step 4.
 416
 417         random = self.random
 418         if kappa <= 1e-6:
 419             return TWOPI * random()
 420
 421         a = 1.0 + _sqrt(1.0 + 4.0 * kappa * kappa)
 422         b = (a - _sqrt(2.0 * a))/(2.0 * kappa)
 423         r = (1.0 + b * b)/(2.0 * b)
 424
 425         while 1:
 426             u1 = random()
 427
 428             z = _cos(_pi * u1)
 429             f = (1.0 + r * z)/(r + z)
 430             c = kappa * (r - f)
 431
 432             u2 = random()
 433
 434             if not (u2 >= c * (2.0 - c) and u2 > c * _exp(1.0 - c)):
 435                 break
 436
 437         u3 = random()
 438         if u3 > 0.5:
 439             theta = (mu % TWOPI) + _acos(f)
 440         else:
 441             theta = (mu % TWOPI) - _acos(f)
 442
 443         return theta
 444
 445 ## -------------------- gamma distribution --------------------
 446
 447     def gammavariate(self, alpha, beta):
 448         # beta times standard gamma
 449         ainv = _sqrt(2.0 * alpha - 1.0)
 450         return beta * self.stdgamma(alpha, ainv, alpha - LOG4, alpha + ainv)
 451
 452     def stdgamma(self, alpha, ainv, bbb, ccc):
 453         # ainv = sqrt(2 * alpha - 1)
 454         # bbb = alpha - log(4)
 455         # ccc = alpha + ainv
 456
 457         random = self.random
 458         if alpha <= 0.0:
 459             raise ValueError, 'stdgamma: alpha must be > 0.0'
 460
 461         if alpha > 1.0:
 462
 463             # Uses R.C.H. Cheng, "The generation of Gamma
 464             # variables with non-integral shape parameters",
 465             # Applied Statistics, (1977), 26, No. 1, p71-74
 466
 467             while 1:
 468                 u1 = random()
 469                 u2 = random()
 470                 v = _log(u1/(1.0-u1))/ainv
 471                 x = alpha*_exp(v)
 472                 z = u1*u1*u2
 473                 r = bbb+ccc*v-x
 474                 if r + SG_MAGICCONST - 4.5*z >= 0.0 or r >= _log(z):
 475                     return x
 476
 477         elif alpha == 1.0:
 478             # expovariate(1)
 479             u = random()
 480             while u <= 1e-7:
 481                 u = random()
 482             return -_log(u)
 483
 484         else:   # alpha is between 0 and 1 (exclusive)
 485
 486             # Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle
 487
 488             while 1:
 489                 u = random()
 490                 b = (_e + alpha)/_e
 491                 p = b*u
 492                 if p <= 1.0:
 493                     x = pow(p, 1.0/alpha)
 494                 else:
 495                     # p > 1
 496                     x = -_log((b-p)/alpha)
 497                 u1 = random()
 498                 if not (((p <= 1.0) and (u1 > _exp(-x))) or
 499                           ((p > 1)  and  (u1 > pow(x, alpha - 1.0)))):
 500                     break
 501             return x
 502
 503
 504 ## -------------------- Gauss (faster alternative) --------------------
 505
 506     def gauss(self, mu, sigma):
 507
 508         # When x and y are two variables from [0, 1), uniformly
 509         # distributed, then
 510         #
 511         #    cos(2*pi*x)*sqrt(-2*log(1-y))
 512         #    sin(2*pi*x)*sqrt(-2*log(1-y))
 513         #
 514         # are two *independent* variables with normal distribution
 515         # (mu = 0, sigma = 1).
 516         # (Lambert Meertens)
 517         # (corrected version; bug discovered by Mike Miller, fixed by LM)
 518
 519         # Multithreading note: When two threads call this function
 520         # simultaneously, it is possible that they will receive the
 521         # same return value.  The window is very small though.  To
 522         # avoid this, you have to use a lock around all calls.  (I
 523         # didn't want to slow this down in the serial case by using a
 524         # lock here.)
 525
 526         random = self.random
 527         z = self.gauss_next
 528         self.gauss_next = None
 529         if z is None:
 530             x2pi = random() * TWOPI
 531             g2rad = _sqrt(-2.0 * _log(1.0 - random()))
 532             z = _cos(x2pi) * g2rad
 533             self.gauss_next = _sin(x2pi) * g2rad
 534
 535         return mu + z*sigma
 536
 537 ## -------------------- beta --------------------
 538 ## See
 539 ## http://sourceforge.net/bugs/?func=detailbug&bug_id=130030&group_id=5470
 540 ## for Ivan Frohne's insightful analysis of why the original implementation:
 541 ##
 542 ##    def betavariate(self, alpha, beta):
 543 ##        # Discrete Event Simulation in C, pp 87-88.
 544 ##
 545 ##        y = self.expovariate(alpha)
 546 ##        z = self.expovariate(1.0/beta)
 547 ##        return z/(y+z)
 548 ##
 549 ## was dead wrong, and how it probably got that way.
 550
 551     def betavariate(self, alpha, beta):
 552         # This version due to Janne Sinkkonen, and matches all the std
 553         # texts (e.g., Knuth Vol 2 Ed 3 pg 134 "the beta distribution").
 554         y = self.gammavariate(alpha, 1.)
 555         if y == 0:
 556             return 0.0
 557         else:
 558             return y / (y + self.gammavariate(beta, 1.))
 559
 560 ## -------------------- Pareto --------------------
 561
 562     def paretovariate(self, alpha):
 563         # Jain, pg. 495
 564
 565         u = self.random()
 566         return 1.0 / pow(u, 1.0/alpha)
 567
 568 ## -------------------- Weibull --------------------
 569
 570     def weibullvariate(self, alpha, beta):
 571         # Jain, pg. 499; bug fix courtesy Bill Arms
 572
 573         u = self.random()
 574         return alpha * pow(-_log(u), 1.0/beta)
 575
 576 ## -------------------- test program --------------------
 577
 578 def _test_generator(n, funccall):
 579     import time
 580     print n, 'times', funccall
 581     code = compile(funccall, funccall, 'eval')
 582     sum = 0.0
 583     sqsum = 0.0
 584     smallest = 1e10
 585     largest = -1e10
 586     t0 = time.time()
 587     for i in range(n):
 588         x = eval(code)
 589         sum = sum + x
 590         sqsum = sqsum + x*x
 591         smallest = min(x, smallest)
 592         largest = max(x, largest)
 593     t1 = time.time()
 594     print round(t1-t0, 3), 'sec,',
 595     avg = sum/n
 596     stddev = _sqrt(sqsum/n - avg*avg)
 597     print 'avg %g, stddev %g, min %g, max %g' % \
 598               (avg, stddev, smallest, largest)
 599
 600 def _test(N=200):
 601     print 'TWOPI         =', TWOPI
 602     print 'LOG4          =', LOG4
 603     print 'NV_MAGICCONST =', NV_MAGICCONST
 604     print 'SG_MAGICCONST =', SG_MAGICCONST
 605     _test_generator(N, 'random()')
 606     _test_generator(N, 'normalvariate(0.0, 1.0)')
 607     _test_generator(N, 'lognormvariate(0.0, 1.0)')
 608     _test_generator(N, 'cunifvariate(0.0, 1.0)')
 609     _test_generator(N, 'expovariate(1.0)')
 610     _test_generator(N, 'vonmisesvariate(0.0, 1.0)')
 611     _test_generator(N, 'gammavariate(0.5, 1.0)')
 612     _test_generator(N, 'gammavariate(0.9, 1.0)')
 613     _test_generator(N, 'gammavariate(1.0, 1.0)')
 614     _test_generator(N, 'gammavariate(2.0, 1.0)')
 615     _test_generator(N, 'gammavariate(20.0, 1.0)')
 616     _test_generator(N, 'gammavariate(200.0, 1.0)')
 617     _test_generator(N, 'gauss(0.0, 1.0)')
 618     _test_generator(N, 'betavariate(3.0, 3.0)')
 619     _test_generator(N, 'paretovariate(1.0)')
 620     _test_generator(N, 'weibullvariate(1.0, 1.0)')
 621
 622     # Test jumpahead.
 623     s = getstate()
 624     jumpahead(N)
 625     r1 = random()
 626     # now do it the slow way
 627     setstate(s)
 628     for i in range(N):
 629         random()
 630     r2 = random()
 631     if r1 != r2:
 632         raise ValueError("jumpahead test failed " + `(N, r1, r2)`)
 633
 634 # Create one instance, seeded from current time, and export its methods
 635 # as module-level functions.  The functions are not threadsafe, and state
 636 # is shared across all uses (both in the user's code and in the Python
 637 # libraries), but that's fine for most programs and is easier for the
 638 # casual user than making them instantiate their own Random() instance.
 639 _inst = Random()
 640 seed = _inst.seed
 641 random = _inst.random
 642 uniform = _inst.uniform
 643 randint = _inst.randint
 644 choice = _inst.choice
 645 randrange = _inst.randrange
 646 shuffle = _inst.shuffle
 647 normalvariate = _inst.normalvariate
 648 lognormvariate = _inst.lognormvariate
 649 cunifvariate = _inst.cunifvariate
 650 expovariate = _inst.expovariate
 651 vonmisesvariate = _inst.vonmisesvariate
 652 gammavariate = _inst.gammavariate
 653 stdgamma = _inst.stdgamma
 654 gauss = _inst.gauss
 655 betavariate = _inst.betavariate
 656 paretovariate = _inst.paretovariate
 657 weibullvariate = _inst.weibullvariate
 658 getstate = _inst.getstate
 659 setstate = _inst.setstate
 660 jumpahead = _inst.jumpahead
 661 whseed = _inst.whseed
 662
 663 if __name__ == '__main__':
 664     _test()