3 from __future__
import absolute_import
, division
, print_function
13 class DeltaAlgorithm(object):
17 def test(self
, changes
):
22 def getTestResult(self
, changes
):
23 # There is no reason to cache successful tests because we will
24 # always reduce the changeset when we see one.
26 changeset
= frozenset(changes
)
27 if changeset
in self
.cache
:
29 elif not self
.test(changes
):
30 self
.cache
.add(changeset
)
35 def run(self
, changes
, force
=False):
36 # Make sure the initial test passes, if not then (a) either
37 # the user doesn't expect monotonicity, and we may end up
38 # doing O(N^2) tests, or (b) the test is wrong. Avoid the
39 # O(N^2) case unless user requests it.
41 if not self
.getTestResult(changes
):
42 raise ValueError("Initial test passed to delta fails.")
44 # Check empty set first to quickly find poor test functions.
45 if self
.getTestResult(set()):
48 return self
.delta(changes
, self
.split(changes
))
51 """split(set) -> [sets]
53 Partition a set into one or two pieces.
56 # There are many ways to split, we could do a better job with more
57 # context information (but then the API becomes grosser).
63 return L
[:mid
], L
[mid
:]
65 def delta(self
, c
, sets
):
66 # assert(reduce(set.union, sets, set()) == c)
68 # If there is nothing left we can remove, we are done.
72 # Look for a passing subset.
73 res
= self
.search(c
, sets
)
77 # Otherwise, partition sets if possible; if not we are done.
78 refined
= sum(map(list, map(self
.split
, sets
)), [])
79 if len(refined
) == len(sets
):
82 return self
.delta(c
, refined
)
84 def search(self
, c
, sets
):
85 for i
, S
in enumerate(sets
):
86 # If test passes on this subset alone, recurse.
87 if self
.getTestResult(S
):
88 return self
.delta(S
, self
.split(S
))
90 # Otherwise if we have more than two sets, see if test
91 # pases without this subset.
93 complement
= sum(sets
[:i
] + sets
[i
+ 1 :], [])
94 if self
.getTestResult(complement
):
95 return self
.delta(complement
, sets
[:i
] + sets
[i
+ 1 :])
102 def __init__(self
, type, data
, flags
, file, line
, column
):
111 kTokenRE
= re
.compile(
112 r
"""([a-z_]+) '(.*)'\t(.*)\tLoc=<(.*):(.*):(.*)>""", re
.DOTALL | re
.MULTILINE
117 p
= subprocess
.Popen(
118 ["clang", "-dump-raw-tokens", path
],
119 stdin
=subprocess
.PIPE
,
120 stdout
=subprocess
.PIPE
,
121 stderr
=subprocess
.PIPE
,
123 out
, err
= p
.communicate()
127 for ln
in err
.split("\n"):
128 # Silly programmers refuse to print in simple machine readable
133 collect
= collect
+ "\n" + ln
134 if "Loc=<" in ln
and ln
.endswith(">"):
135 ln
, collect
= collect
, None
136 tokens
.append(Token(*kTokenRE
.match(ln
).groups()))
144 class TMBDDelta(DeltaAlgorithm
):
145 def __init__(self
, testProgram
, tokenLists
, log
):
146 def patchName(name
, suffix
):
147 base
, ext
= os
.path
.splitext(name
)
148 return base
+ "." + suffix
+ ext
150 super(TMBDDelta
, self
).__init
__()
151 self
.testProgram
= testProgram
152 self
.tokenLists
= tokenLists
153 self
.tempFiles
= [patchName(f
, "tmp") for f
, _
in self
.tokenLists
]
154 self
.targetFiles
= [patchName(f
, "ok") for f
, _
in self
.tokenLists
]
158 def writeFiles(self
, changes
, fileNames
):
159 assert len(fileNames
) == len(self
.tokenLists
)
160 byFile
= [[] for i
in self
.tokenLists
]
164 for i
, (file, tokens
) in enumerate(self
.tokenLists
):
165 f
= open(fileNames
[i
], "w")
172 def test(self
, changes
):
175 byFile
= self
.writeFiles(changes
, self
.tempFiles
)
178 print("TEST - ", end
=" ", file=sys
.stderr
)
180 for i
, (file, _
) in enumerate(self
.tokenLists
):
183 sys
.stderr
.write("\n ")
184 sys
.stderr
.write("%s:%d tokens: [" % (file, len(byFile
[i
])))
187 if prev
is None or j
!= prev
+ 1:
189 sys
.stderr
.write("%d][" % prev
)
190 sys
.stderr
.write(str(j
))
191 sys
.stderr
.write(":")
194 sys
.stderr
.write(str(byFile
[i
][-1]))
195 sys
.stderr
.write("] ")
200 "%s:%d tokens" % (file, len(byFile
[i
]))
201 for i
, (file, _
) in enumerate(self
.tokenLists
)
208 p
= subprocess
.Popen([self
.testProgram
] + self
.tempFiles
)
212 self
.writeFiles(changes
, self
.targetFiles
)
215 print("=> %s" % res
, file=sys
.stderr
)
218 print("\nSUCCESS (%d tokens)" % len(changes
))
220 sys
.stderr
.write(".")
225 res
= super(TMBDDelta
, self
).run(
228 for i
, (file, tokens
) in enumerate(self
.tokenLists
)
229 for j
in range(len(tokens
))
232 self
.writeFiles(res
, self
.targetFiles
)
234 print(file=sys
.stderr
)
238 def tokenBasedMultiDelta(program
, files
, log
):
239 # Read in the lists of tokens.
240 tokenLists
= [(file, [t
.data
for t
in getTokens(file)]) for file in files
]
242 numTokens
= sum([len(tokens
) for _
, tokens
in tokenLists
])
243 print("Delta on %s with %d tokens." % (", ".join(files
), numTokens
))
245 tbmd
= TMBDDelta(program
, tokenLists
, log
)
250 "Finished %s with %d tokens (in %d tests)."
251 % (", ".join(tbmd
.targetFiles
), len(res
), tbmd
.numTests
)
256 from optparse
import OptionParser
, OptionGroup
258 parser
= OptionParser("%prog <test program> {files+}")
263 help="set debug level [default %default]",
268 (opts
, args
) = parser
.parse_args()
271 parser
.error("Invalid number of arguments.")
273 program
, files
= args
[0], args
[1:]
275 md
= tokenBasedMultiDelta(program
, files
, log
=opts
.debugLevel
)
278 if __name__
== "__main__":
281 except KeyboardInterrupt:
282 print("Interrupted.", file=sys
.stderr
)
283 os
._exit
(1) # Avoid freeing our giant cache.