Fix the tag.
[python/dscho.git] / Lib / random.py
blobe0c015d8e8c74c10a42fdea0cad8a40ef39bb7f0
1 """Random variable generators.
3 integers
4 --------
5 uniform within range
7 sequences
8 ---------
9 pick random element
10 pick random sample
11 generate random permutation
13 distributions on the real line:
14 ------------------------------
15 uniform
16 triangular
17 normal (Gaussian)
18 lognormal
19 negative exponential
20 gamma
21 beta
22 pareto
23 Weibull
25 distributions on the circle (angles 0 to 2pi)
26 ---------------------------------------------
27 circular uniform
28 von Mises
30 General notes on the underlying Mersenne Twister core generator:
32 * The period is 2**19937-1.
33 * It is one of the most extensively tested generators in existence.
34 * The random() method is implemented in C, executes in a single Python step,
35 and is, therefore, threadsafe.
37 """
39 from __future__ import division
40 from warnings import warn as _warn
41 from types import MethodType as _MethodType, BuiltinMethodType as _BuiltinMethodType
42 from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil
43 from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
44 from os import urandom as _urandom
45 from binascii import hexlify as _hexlify
47 __all__ = ["Random","seed","random","uniform","randint","choice","sample",
48 "randrange","shuffle","normalvariate","lognormvariate",
49 "expovariate","vonmisesvariate","gammavariate","triangular",
50 "gauss","betavariate","paretovariate","weibullvariate",
51 "getstate","setstate", "getrandbits",
52 "SystemRandom"]
54 NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0)
55 TWOPI = 2.0*_pi
56 LOG4 = _log(4.0)
57 SG_MAGICCONST = 1.0 + _log(4.5)
58 BPF = 53 # Number of bits in a float
59 RECIP_BPF = 2**-BPF
62 # Translated by Guido van Rossum from C source provided by
63 # Adrian Baddeley. Adapted by Raymond Hettinger for use with
64 # the Mersenne Twister and os.urandom() core generators.
66 import _random
68 class Random(_random.Random):
69 """Random number generator base class used by bound module functions.
71 Used to instantiate instances of Random to get generators that don't
72 share state.
74 Class Random can also be subclassed if you want to use a different basic
75 generator of your own devising: in that case, override the following
76 methods: random(), seed(), getstate(), and setstate().
77 Optionally, implement a getrandombits() method so that randrange()
78 can cover arbitrarily large ranges.
80 """
82 VERSION = 3 # used by getstate/setstate
84 def __init__(self, x=None):
85 """Initialize an instance.
87 Optional argument x controls seeding, as for Random.seed().
88 """
90 self.seed(x)
91 self.gauss_next = None
93 def seed(self, a=None):
94 """Initialize internal state from hashable object.
96 None or no argument seeds from current time or from an operating
97 system specific randomness source if available.
99 If a is not None or an int or long, hash(a) is used instead.
102 if a is None:
103 try:
104 a = int(_hexlify(_urandom(16)), 16)
105 except NotImplementedError:
106 import time
107 a = int(time.time() * 256) # use fractional seconds
109 super().seed(a)
110 self.gauss_next = None
112 def getstate(self):
113 """Return internal state; can be passed to setstate() later."""
114 return self.VERSION, super().getstate(), self.gauss_next
116 def setstate(self, state):
117 """Restore internal state from object returned by getstate()."""
118 version = state[0]
119 if version == 3:
120 version, internalstate, self.gauss_next = state
121 super().setstate(internalstate)
122 elif version == 2:
123 version, internalstate, self.gauss_next = state
124 # In version 2, the state was saved as signed ints, which causes
125 # inconsistencies between 32/64-bit systems. The state is
126 # really unsigned 32-bit ints, so we convert negative ints from
127 # version 2 to positive longs for version 3.
128 try:
129 internalstate = tuple( x % (2**32) for x in internalstate )
130 except ValueError as e:
131 raise TypeError from e
132 super(Random, self).setstate(internalstate)
133 else:
134 raise ValueError("state with version %s passed to "
135 "Random.setstate() of version %s" %
136 (version, self.VERSION))
138 ## ---- Methods below this point do not need to be overridden when
139 ## ---- subclassing for the purpose of using a different core generator.
141 ## -------------------- pickle support -------------------
143 def __getstate__(self): # for pickle
144 return self.getstate()
146 def __setstate__(self, state): # for pickle
147 self.setstate(state)
149 def __reduce__(self):
150 return self.__class__, (), self.getstate()
152 ## -------------------- integer methods -------------------
154 def randrange(self, start, stop=None, step=1, int=int, default=None,
155 maxwidth=1<<BPF):
156 """Choose a random item from range(start, stop[, step]).
158 This fixes the problem with randint() which includes the
159 endpoint; in Python this is usually not what you want.
160 Do not supply the 'int', 'default', and 'maxwidth' arguments.
163 # This code is a bit messy to make it fast for the
164 # common case while still doing adequate error checking.
165 istart = int(start)
166 if istart != start:
167 raise ValueError("non-integer arg 1 for randrange()")
168 if stop is default:
169 if istart > 0:
170 if istart >= maxwidth:
171 return self._randbelow(istart)
172 return int(self.random() * istart)
173 raise ValueError("empty range for randrange()")
175 # stop argument supplied.
176 istop = int(stop)
177 if istop != stop:
178 raise ValueError("non-integer stop for randrange()")
179 width = istop - istart
180 if step == 1 and width > 0:
181 # Note that
182 # int(istart + self.random()*width)
183 # instead would be incorrect. For example, consider istart
184 # = -2 and istop = 0. Then the guts would be in
185 # -2.0 to 0.0 exclusive on both ends (ignoring that random()
186 # might return 0.0), and because int() truncates toward 0, the
187 # final result would be -1 or 0 (instead of -2 or -1).
188 # istart + int(self.random()*width)
189 # would also be incorrect, for a subtler reason: the RHS
190 # can return a long, and then randrange() would also return
191 # a long, but we're supposed to return an int (for backward
192 # compatibility).
194 if width >= maxwidth:
195 return int(istart + self._randbelow(width))
196 return int(istart + int(self.random()*width))
197 if step == 1:
198 raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width))
200 # Non-unit step argument supplied.
201 istep = int(step)
202 if istep != step:
203 raise ValueError("non-integer step for randrange()")
204 if istep > 0:
205 n = (width + istep - 1) // istep
206 elif istep < 0:
207 n = (width + istep + 1) // istep
208 else:
209 raise ValueError("zero step for randrange()")
211 if n <= 0:
212 raise ValueError("empty range for randrange()")
214 if n >= maxwidth:
215 return istart + istep*self._randbelow(n)
216 return istart + istep*int(self.random() * n)
218 def randint(self, a, b):
219 """Return random integer in range [a, b], including both end points.
222 return self.randrange(a, b+1)
224 def _randbelow(self, n, _log=_log, int=int, _maxwidth=1<<BPF,
225 _Method=_MethodType, _BuiltinMethod=_BuiltinMethodType):
226 """Return a random int in the range [0,n)
228 Handles the case where n has more bits than returned
229 by a single call to the underlying generator.
232 try:
233 getrandbits = self.getrandbits
234 except AttributeError:
235 pass
236 else:
237 # Only call self.getrandbits if the original random() builtin method
238 # has not been overridden or if a new getrandbits() was supplied.
239 # This assures that the two methods correspond.
240 if type(self.random) is _BuiltinMethod or type(getrandbits) is _Method:
241 k = int(1.00001 + _log(n-1, 2.0)) # 2**k > n-1 > 2**(k-2)
242 r = getrandbits(k)
243 while r >= n:
244 r = getrandbits(k)
245 return r
246 if n >= _maxwidth:
247 _warn("Underlying random() generator does not supply \n"
248 "enough bits to choose from a population range this large")
249 return int(self.random() * n)
251 ## -------------------- sequence methods -------------------
253 def choice(self, seq):
254 """Choose a random element from a non-empty sequence."""
255 return seq[int(self.random() * len(seq))] # raises IndexError if seq is empty
257 def shuffle(self, x, random=None, int=int):
258 """x, random=random.random -> shuffle list x in place; return None.
260 Optional arg random is a 0-argument function returning a random
261 float in [0.0, 1.0); by default, the standard random.random.
264 if random is None:
265 random = self.random
266 for i in reversed(range(1, len(x))):
267 # pick an element in x[:i+1] with which to exchange x[i]
268 j = int(random() * (i+1))
269 x[i], x[j] = x[j], x[i]
271 def sample(self, population, k):
272 """Chooses k unique random elements from a population sequence or set.
274 Returns a new list containing elements from the population while
275 leaving the original population unchanged. The resulting list is
276 in selection order so that all sub-slices will also be valid random
277 samples. This allows raffle winners (the sample) to be partitioned
278 into grand prize and second place winners (the subslices).
280 Members of the population need not be hashable or unique. If the
281 population contains repeats, then each occurrence is a possible
282 selection in the sample.
284 To choose a sample in a range of integers, use range as an argument.
285 This is especially fast and space efficient for sampling from a
286 large population: sample(range(10000000), 60)
289 # Sampling without replacement entails tracking either potential
290 # selections (the pool) in a list or previous selections in a set.
292 # When the number of selections is small compared to the
293 # population, then tracking selections is efficient, requiring
294 # only a small set and an occasional reselection. For
295 # a larger number of selections, the pool tracking method is
296 # preferred since the list takes less space than the
297 # set and it doesn't suffer from frequent reselections.
299 if isinstance(population, (set, frozenset)):
300 population = tuple(population)
301 if not hasattr(population, '__getitem__') or hasattr(population, 'keys'):
302 raise TypeError("Population must be a sequence or set. For dicts, use dict.keys().")
303 random = self.random
304 n = len(population)
305 if not 0 <= k <= n:
306 raise ValueError("Sample larger than population")
307 _int = int
308 result = [None] * k
309 setsize = 21 # size of a small set minus size of an empty list
310 if k > 5:
311 setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets
312 if n <= setsize:
313 # An n-length list is smaller than a k-length set
314 pool = list(population)
315 for i in range(k): # invariant: non-selected at [0,n-i)
316 j = _int(random() * (n-i))
317 result[i] = pool[j]
318 pool[j] = pool[n-i-1] # move non-selected item into vacancy
319 else:
320 selected = set()
321 selected_add = selected.add
322 for i in range(k):
323 j = _int(random() * n)
324 while j in selected:
325 j = _int(random() * n)
326 selected_add(j)
327 result[i] = population[j]
328 return result
330 ## -------------------- real-valued distributions -------------------
332 ## -------------------- uniform distribution -------------------
334 def uniform(self, a, b):
335 """Get a random number in the range [a, b)."""
336 return a + (b-a) * self.random()
338 ## -------------------- triangular --------------------
340 def triangular(self, low=0.0, high=1.0, mode=None):
341 """Triangular distribution.
343 Continuous distribution bounded by given lower and upper limits,
344 and having a given mode value in-between.
346 http://en.wikipedia.org/wiki/Triangular_distribution
349 u = self.random()
350 c = 0.5 if mode is None else (mode - low) / (high - low)
351 if u > c:
352 u = 1.0 - u
353 c = 1.0 - c
354 low, high = high, low
355 return low + (high - low) * (u * c) ** 0.5
357 ## -------------------- normal distribution --------------------
359 def normalvariate(self, mu, sigma):
360 """Normal distribution.
362 mu is the mean, and sigma is the standard deviation.
365 # mu = mean, sigma = standard deviation
367 # Uses Kinderman and Monahan method. Reference: Kinderman,
368 # A.J. and Monahan, J.F., "Computer generation of random
369 # variables using the ratio of uniform deviates", ACM Trans
370 # Math Software, 3, (1977), pp257-260.
372 random = self.random
373 while 1:
374 u1 = random()
375 u2 = 1.0 - random()
376 z = NV_MAGICCONST*(u1-0.5)/u2
377 zz = z*z/4.0
378 if zz <= -_log(u2):
379 break
380 return mu + z*sigma
382 ## -------------------- lognormal distribution --------------------
384 def lognormvariate(self, mu, sigma):
385 """Log normal distribution.
387 If you take the natural logarithm of this distribution, you'll get a
388 normal distribution with mean mu and standard deviation sigma.
389 mu can have any value, and sigma must be greater than zero.
392 return _exp(self.normalvariate(mu, sigma))
394 ## -------------------- exponential distribution --------------------
396 def expovariate(self, lambd):
397 """Exponential distribution.
399 lambd is 1.0 divided by the desired mean. (The parameter would be
400 called "lambda", but that is a reserved word in Python.) Returned
401 values range from 0 to positive infinity.
404 # lambd: rate lambd = 1/mean
405 # ('lambda' is a Python reserved word)
407 random = self.random
408 u = random()
409 while u <= 1e-7:
410 u = random()
411 return -_log(u)/lambd
413 ## -------------------- von Mises distribution --------------------
415 def vonmisesvariate(self, mu, kappa):
416 """Circular data distribution.
418 mu is the mean angle, expressed in radians between 0 and 2*pi, and
419 kappa is the concentration parameter, which must be greater than or
420 equal to zero. If kappa is equal to zero, this distribution reduces
421 to a uniform random angle over the range 0 to 2*pi.
424 # mu: mean angle (in radians between 0 and 2*pi)
425 # kappa: concentration parameter kappa (>= 0)
426 # if kappa = 0 generate uniform random angle
428 # Based upon an algorithm published in: Fisher, N.I.,
429 # "Statistical Analysis of Circular Data", Cambridge
430 # University Press, 1993.
432 # Thanks to Magnus Kessler for a correction to the
433 # implementation of step 4.
435 random = self.random
436 if kappa <= 1e-6:
437 return TWOPI * random()
439 a = 1.0 + _sqrt(1.0 + 4.0 * kappa * kappa)
440 b = (a - _sqrt(2.0 * a))/(2.0 * kappa)
441 r = (1.0 + b * b)/(2.0 * b)
443 while 1:
444 u1 = random()
446 z = _cos(_pi * u1)
447 f = (1.0 + r * z)/(r + z)
448 c = kappa * (r - f)
450 u2 = random()
452 if u2 < c * (2.0 - c) or u2 <= c * _exp(1.0 - c):
453 break
455 u3 = random()
456 if u3 > 0.5:
457 theta = (mu % TWOPI) + _acos(f)
458 else:
459 theta = (mu % TWOPI) - _acos(f)
461 return theta
463 ## -------------------- gamma distribution --------------------
465 def gammavariate(self, alpha, beta):
466 """Gamma distribution. Not the gamma function!
468 Conditions on the parameters are alpha > 0 and beta > 0.
472 # alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2
474 # Warning: a few older sources define the gamma distribution in terms
475 # of alpha > -1.0
476 if alpha <= 0.0 or beta <= 0.0:
477 raise ValueError('gammavariate: alpha and beta must be > 0.0')
479 random = self.random
480 if alpha > 1.0:
482 # Uses R.C.H. Cheng, "The generation of Gamma
483 # variables with non-integral shape parameters",
484 # Applied Statistics, (1977), 26, No. 1, p71-74
486 ainv = _sqrt(2.0 * alpha - 1.0)
487 bbb = alpha - LOG4
488 ccc = alpha + ainv
490 while 1:
491 u1 = random()
492 if not 1e-7 < u1 < .9999999:
493 continue
494 u2 = 1.0 - random()
495 v = _log(u1/(1.0-u1))/ainv
496 x = alpha*_exp(v)
497 z = u1*u1*u2
498 r = bbb+ccc*v-x
499 if r + SG_MAGICCONST - 4.5*z >= 0.0 or r >= _log(z):
500 return x * beta
502 elif alpha == 1.0:
503 # expovariate(1)
504 u = random()
505 while u <= 1e-7:
506 u = random()
507 return -_log(u) * beta
509 else: # alpha is between 0 and 1 (exclusive)
511 # Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle
513 while 1:
514 u = random()
515 b = (_e + alpha)/_e
516 p = b*u
517 if p <= 1.0:
518 x = p ** (1.0/alpha)
519 else:
520 x = -_log((b-p)/alpha)
521 u1 = random()
522 if p > 1.0:
523 if u1 <= x ** (alpha - 1.0):
524 break
525 elif u1 <= _exp(-x):
526 break
527 return x * beta
529 ## -------------------- Gauss (faster alternative) --------------------
531 def gauss(self, mu, sigma):
532 """Gaussian distribution.
534 mu is the mean, and sigma is the standard deviation. This is
535 slightly faster than the normalvariate() function.
537 Not thread-safe without a lock around calls.
541 # When x and y are two variables from [0, 1), uniformly
542 # distributed, then
544 # cos(2*pi*x)*sqrt(-2*log(1-y))
545 # sin(2*pi*x)*sqrt(-2*log(1-y))
547 # are two *independent* variables with normal distribution
548 # (mu = 0, sigma = 1).
549 # (Lambert Meertens)
550 # (corrected version; bug discovered by Mike Miller, fixed by LM)
552 # Multithreading note: When two threads call this function
553 # simultaneously, it is possible that they will receive the
554 # same return value. The window is very small though. To
555 # avoid this, you have to use a lock around all calls. (I
556 # didn't want to slow this down in the serial case by using a
557 # lock here.)
559 random = self.random
560 z = self.gauss_next
561 self.gauss_next = None
562 if z is None:
563 x2pi = random() * TWOPI
564 g2rad = _sqrt(-2.0 * _log(1.0 - random()))
565 z = _cos(x2pi) * g2rad
566 self.gauss_next = _sin(x2pi) * g2rad
568 return mu + z*sigma
570 ## -------------------- beta --------------------
571 ## See
572 ## http://sourceforge.net/bugs/?func=detailbug&bug_id=130030&group_id=5470
573 ## for Ivan Frohne's insightful analysis of why the original implementation:
575 ## def betavariate(self, alpha, beta):
576 ## # Discrete Event Simulation in C, pp 87-88.
578 ## y = self.expovariate(alpha)
579 ## z = self.expovariate(1.0/beta)
580 ## return z/(y+z)
582 ## was dead wrong, and how it probably got that way.
584 def betavariate(self, alpha, beta):
585 """Beta distribution.
587 Conditions on the parameters are alpha > 0 and beta > 0.
588 Returned values range between 0 and 1.
592 # This version due to Janne Sinkkonen, and matches all the std
593 # texts (e.g., Knuth Vol 2 Ed 3 pg 134 "the beta distribution").
594 y = self.gammavariate(alpha, 1.)
595 if y == 0:
596 return 0.0
597 else:
598 return y / (y + self.gammavariate(beta, 1.))
600 ## -------------------- Pareto --------------------
602 def paretovariate(self, alpha):
603 """Pareto distribution. alpha is the shape parameter."""
604 # Jain, pg. 495
606 u = 1.0 - self.random()
607 return 1.0 / pow(u, 1.0/alpha)
609 ## -------------------- Weibull --------------------
611 def weibullvariate(self, alpha, beta):
612 """Weibull distribution.
614 alpha is the scale parameter and beta is the shape parameter.
617 # Jain, pg. 499; bug fix courtesy Bill Arms
619 u = 1.0 - self.random()
620 return alpha * pow(-_log(u), 1.0/beta)
622 ## --------------- Operating System Random Source ------------------
624 class SystemRandom(Random):
625 """Alternate random number generator using sources provided
626 by the operating system (such as /dev/urandom on Unix or
627 CryptGenRandom on Windows).
629 Not available on all systems (see os.urandom() for details).
632 def random(self):
633 """Get the next random number in the range [0.0, 1.0)."""
634 return (int(_hexlify(_urandom(7)), 16) >> 3) * RECIP_BPF
636 def getrandbits(self, k):
637 """getrandbits(k) -> x. Generates a long int with k random bits."""
638 if k <= 0:
639 raise ValueError('number of bits must be greater than zero')
640 if k != int(k):
641 raise TypeError('number of bits should be an integer')
642 bytes = (k + 7) // 8 # bits / 8 and rounded up
643 x = int(_hexlify(_urandom(bytes)), 16)
644 return x >> (bytes * 8 - k) # trim excess bits
646 def seed(self, *args, **kwds):
647 "Stub method. Not used for a system random number generator."
648 return None
650 def _notimplemented(self, *args, **kwds):
651 "Method should not be called for a system random number generator."
652 raise NotImplementedError('System entropy source does not have state.')
653 getstate = setstate = _notimplemented
655 ## -------------------- test program --------------------
657 def _test_generator(n, func, args):
658 import time
659 print(n, 'times', func.__name__)
660 total = 0.0
661 sqsum = 0.0
662 smallest = 1e10
663 largest = -1e10
664 t0 = time.time()
665 for i in range(n):
666 x = func(*args)
667 total += x
668 sqsum = sqsum + x*x
669 smallest = min(x, smallest)
670 largest = max(x, largest)
671 t1 = time.time()
672 print(round(t1-t0, 3), 'sec,', end=' ')
673 avg = total/n
674 stddev = _sqrt(sqsum/n - avg*avg)
675 print('avg %g, stddev %g, min %g, max %g' % \
676 (avg, stddev, smallest, largest))
679 def _test(N=2000):
680 _test_generator(N, random, ())
681 _test_generator(N, normalvariate, (0.0, 1.0))
682 _test_generator(N, lognormvariate, (0.0, 1.0))
683 _test_generator(N, vonmisesvariate, (0.0, 1.0))
684 _test_generator(N, gammavariate, (0.01, 1.0))
685 _test_generator(N, gammavariate, (0.1, 1.0))
686 _test_generator(N, gammavariate, (0.1, 2.0))
687 _test_generator(N, gammavariate, (0.5, 1.0))
688 _test_generator(N, gammavariate, (0.9, 1.0))
689 _test_generator(N, gammavariate, (1.0, 1.0))
690 _test_generator(N, gammavariate, (2.0, 1.0))
691 _test_generator(N, gammavariate, (20.0, 1.0))
692 _test_generator(N, gammavariate, (200.0, 1.0))
693 _test_generator(N, gauss, (0.0, 1.0))
694 _test_generator(N, betavariate, (3.0, 3.0))
695 _test_generator(N, triangular, (0.0, 1.0, 1.0/3.0))
697 # Create one instance, seeded from current time, and export its methods
698 # as module-level functions. The functions share state across all uses
699 #(both in the user's code and in the Python libraries), but that's fine
700 # for most programs and is easier for the casual user than making them
701 # instantiate their own Random() instance.
703 _inst = Random()
704 seed = _inst.seed
705 random = _inst.random
706 uniform = _inst.uniform
707 triangular = _inst.triangular
708 randint = _inst.randint
709 choice = _inst.choice
710 randrange = _inst.randrange
711 sample = _inst.sample
712 shuffle = _inst.shuffle
713 normalvariate = _inst.normalvariate
714 lognormvariate = _inst.lognormvariate
715 expovariate = _inst.expovariate
716 vonmisesvariate = _inst.vonmisesvariate
717 gammavariate = _inst.gammavariate
718 gauss = _inst.gauss
719 betavariate = _inst.betavariate
720 paretovariate = _inst.paretovariate
721 weibullvariate = _inst.weibullvariate
722 getstate = _inst.getstate
723 setstate = _inst.setstate
724 getrandbits = _inst.getrandbits
726 if __name__ == '__main__':
727 _test()