Lib/test/test_mutants.py

   1 from test_support import verbose
   2 import random
   3
   4 # From SF bug #422121:  Insecurities in dict comparison.
   5
   6 # Safety of code doing comparisons has been an historical Python weak spot.
   7 # The problem is that comparison of structures written in C *naturally*
   8 # wants to hold on to things like the size of the container, or "the
   9 # biggest" containee so far, across a traversal of the container; but
  10 # code to do containee comparisons can call back into Python and mutate
  11 # the container in arbitrary ways while the C loop is in midstream.  If the
  12 # C code isn't extremely paranoid about digging things out of memory on
  13 # each trip, and artificially boosting refcounts for the duration, anything
  14 # from infinite loops to OS crashes can result (yes, I use Windows <wink>).
  15 #
  16 # The other problem is that code designed to provoke a weakness is usually
  17 # white-box code, and so catches only the particular vulnerabilities the
  18 # author knew to protect against.  For example, Python's list.sort() code
  19 # went thru many iterations as one "new" vulnerability after another was
  20 # discovered.
  21 #
  22 # So the dict comparison test here uses a black-box approach instead,
  23 # generating dicts of various sizes at random, and performing random
  24 # mutations on them at random times.  This proved very effective,
  25 # triggering at least six distinct failure modes the first 20 times I
  26 # ran it.  Indeed, at the start, the driver never got beyond 6 iterations
  27 # before the test died.
  28
  29 # The dicts are global to make it easy to mutate tham from within functions.
  30 dict1 = {}
  31 dict2 = {}
  32
  33 # The current set of keys in dict1 and dict2.  These are materialized as
  34 # lists to make it easy to pick a dict key at random.
  35 dict1keys = []
  36 dict2keys = []
  37
  38 # Global flag telling maybe_mutate() wether to *consider* mutating.
  39 mutate = 0
  40
  41 # If global mutate is true, consider mutating a dict.  May or may not
  42 # mutate a dict even if mutate is true.  If it does decide to mutate a
  43 # dict, it picks one of {dict1, dict2} at random, and deletes a random
  44 # entry from it; or, more rarely, adds a random element.
  45
  46 def maybe_mutate():
  47     global mutate
  48     if not mutate:
  49         return
  50     if random.random() < 0.5:
  51         return
  52
  53     if random.random() < 0.5:
  54         target, keys = dict1, dict1keys
  55     else:
  56         target, keys = dict2, dict2keys
  57
  58     if random.random() < 0.2:
  59         # Insert a new key.
  60         mutate = 0   # disable mutation until key inserted
  61         while 1:
  62             newkey = Horrid(random.randrange(100))
  63             if not target.has_key(newkey):
  64                 break
  65         target[newkey] = Horrid(random.randrange(100))
  66         keys.append(newkey)
  67         mutate = 1
  68
  69     elif keys:
  70         # Delete a key at random.
  71         i = random.randrange(len(keys))
  72         key = keys[i]
  73         del target[key]
  74         # CAUTION:  don't use keys.remove(key) here.  Or do <wink>.  The
  75         # point is that .remove() would trigger more comparisons, and so
  76         # also more calls to this routine.  We're mutating often enough
  77         # without that.
  78         del keys[i]
  79
  80 # A horrid class that triggers random mutations of dict1 and dict2 when
  81 # instances are compared.
  82
  83 class Horrid:
  84     def __init__(self, i):
  85         # Comparison outcomes are determined by the value of i.
  86         self.i = i
  87
  88         # An artificial hashcode is selected at random so that we don't
  89         # have any systematic relationship between comparison outcomes
  90         # (based on self.i and other.i) and relative position within the
  91         # hash vector (based on hashcode).
  92         self.hashcode = random.randrange(1000000000)
  93
  94     def __hash__(self):
  95         return self.hashcode
  96
  97     def __cmp__(self, other):
  98         maybe_mutate()   # The point of the test.
  99         return cmp(self.i, other.i)
 100
 101     def __repr__(self):
 102         return "Horrid(%d)" % self.i
 103
 104 # Fill dict d with numentries (Horrid(i), Horrid(j)) key-value pairs,
 105 # where i and j are selected at random from the candidates list.
 106 # Return d.keys() after filling.
 107
 108 def fill_dict(d, candidates, numentries):
 109     d.clear()
 110     for i in xrange(numentries):
 111         d[Horrid(random.choice(candidates))] = \
 112             Horrid(random.choice(candidates))
 113     return d.keys()
 114
 115 # Test one pair of randomly generated dicts, each with n entries.
 116 # Note that dict comparison is trivial if they don't have the same number
 117 # of entires (then the "shorter" dict is instantly considered to be the
 118 # smaller one, without even looking at the entries).
 119
 120 def test_one(n):
 121     global mutate, dict1, dict2, dict1keys, dict2keys
 122
 123     # Fill the dicts without mutating them.
 124     mutate = 0
 125     dict1keys = fill_dict(dict1, range(n), n)
 126     dict2keys = fill_dict(dict2, range(n), n)
 127
 128     # Enable mutation, then compare the dicts so long as they have the
 129     # same size.
 130     mutate = 1
 131     if verbose:
 132         print "trying w/ lengths", len(dict1), len(dict2),
 133     while dict1 and len(dict1) == len(dict2):
 134         if verbose:
 135             print ".",
 136         c = cmp(dict1, dict2)
 137     if verbose:
 138         print
 139
 140 # Run test_one n times.  At the start (before the bugs were fixed), 20
 141 # consecutive runs of this test each blew up on or before the sixth time
 142 # test_one was run.  So n doesn't have to be large to get an interesting
 143 # test.
 144 # OTOH, calling with large n is also interesting, to ensure that the fixed
 145 # code doesn't hold on to refcounts *too* long (in which case memory would
 146 # leak).
 147
 148 def test(n):
 149     for i in xrange(n):
 150         test_one(random.randrange(1, 100))
 151
 152 # See last comment block for clues about good values for n.
 153 test(100)