[clang-format] Fix a bug in aligning comments above PPDirective (#72791)
[llvm-project.git] / clang / utils / token-delta.py
blob7c2375c03013f8975a250797a97d9da79adce4fd
1 #!/usr/bin/env python
3 from __future__ import absolute_import, division, print_function
4 import os
5 import re
6 import subprocess
7 import sys
8 import tempfile
10 ###
13 class DeltaAlgorithm(object):
14 def __init__(self):
15 self.cache = set()
17 def test(self, changes):
18 abstract
20 ###
22 def getTestResult(self, changes):
23 # There is no reason to cache successful tests because we will
24 # always reduce the changeset when we see one.
26 changeset = frozenset(changes)
27 if changeset in self.cache:
28 return False
29 elif not self.test(changes):
30 self.cache.add(changeset)
31 return False
32 else:
33 return True
35 def run(self, changes, force=False):
36 # Make sure the initial test passes, if not then (a) either
37 # the user doesn't expect monotonicity, and we may end up
38 # doing O(N^2) tests, or (b) the test is wrong. Avoid the
39 # O(N^2) case unless user requests it.
40 if not force:
41 if not self.getTestResult(changes):
42 raise ValueError("Initial test passed to delta fails.")
44 # Check empty set first to quickly find poor test functions.
45 if self.getTestResult(set()):
46 return set()
47 else:
48 return self.delta(changes, self.split(changes))
50 def split(self, S):
51 """split(set) -> [sets]
53 Partition a set into one or two pieces.
54 """
56 # There are many ways to split, we could do a better job with more
57 # context information (but then the API becomes grosser).
58 L = list(S)
59 mid = len(L) // 2
60 if mid == 0:
61 return (L,)
62 else:
63 return L[:mid], L[mid:]
65 def delta(self, c, sets):
66 # assert(reduce(set.union, sets, set()) == c)
68 # If there is nothing left we can remove, we are done.
69 if len(sets) <= 1:
70 return c
72 # Look for a passing subset.
73 res = self.search(c, sets)
74 if res is not None:
75 return res
77 # Otherwise, partition sets if possible; if not we are done.
78 refined = sum(map(list, map(self.split, sets)), [])
79 if len(refined) == len(sets):
80 return c
82 return self.delta(c, refined)
84 def search(self, c, sets):
85 for i, S in enumerate(sets):
86 # If test passes on this subset alone, recurse.
87 if self.getTestResult(S):
88 return self.delta(S, self.split(S))
90 # Otherwise if we have more than two sets, see if test
91 # pases without this subset.
92 if len(sets) > 2:
93 complement = sum(sets[:i] + sets[i + 1 :], [])
94 if self.getTestResult(complement):
95 return self.delta(complement, sets[:i] + sets[i + 1 :])
98 ###
101 class Token(object):
102 def __init__(self, type, data, flags, file, line, column):
103 self.type = type
104 self.data = data
105 self.flags = flags
106 self.file = file
107 self.line = line
108 self.column = column
111 kTokenRE = re.compile(
112 r"""([a-z_]+) '(.*)'\t(.*)\tLoc=<(.*):(.*):(.*)>""", re.DOTALL | re.MULTILINE
116 def getTokens(path):
117 p = subprocess.Popen(
118 ["clang", "-dump-raw-tokens", path],
119 stdin=subprocess.PIPE,
120 stdout=subprocess.PIPE,
121 stderr=subprocess.PIPE,
123 out, err = p.communicate()
125 tokens = []
126 collect = None
127 for ln in err.split("\n"):
128 # Silly programmers refuse to print in simple machine readable
129 # formats. Whatever.
130 if collect is None:
131 collect = ln
132 else:
133 collect = collect + "\n" + ln
134 if "Loc=<" in ln and ln.endswith(">"):
135 ln, collect = collect, None
136 tokens.append(Token(*kTokenRE.match(ln).groups()))
138 return tokens
144 class TMBDDelta(DeltaAlgorithm):
145 def __init__(self, testProgram, tokenLists, log):
146 def patchName(name, suffix):
147 base, ext = os.path.splitext(name)
148 return base + "." + suffix + ext
150 super(TMBDDelta, self).__init__()
151 self.testProgram = testProgram
152 self.tokenLists = tokenLists
153 self.tempFiles = [patchName(f, "tmp") for f, _ in self.tokenLists]
154 self.targetFiles = [patchName(f, "ok") for f, _ in self.tokenLists]
155 self.log = log
156 self.numTests = 0
158 def writeFiles(self, changes, fileNames):
159 assert len(fileNames) == len(self.tokenLists)
160 byFile = [[] for i in self.tokenLists]
161 for i, j in changes:
162 byFile[i].append(j)
164 for i, (file, tokens) in enumerate(self.tokenLists):
165 f = open(fileNames[i], "w")
166 for j in byFile[i]:
167 f.write(tokens[j])
168 f.close()
170 return byFile
172 def test(self, changes):
173 self.numTests += 1
175 byFile = self.writeFiles(changes, self.tempFiles)
177 if self.log:
178 print("TEST - ", end=" ", file=sys.stderr)
179 if self.log > 1:
180 for i, (file, _) in enumerate(self.tokenLists):
181 indices = byFile[i]
182 if i:
183 sys.stderr.write("\n ")
184 sys.stderr.write("%s:%d tokens: [" % (file, len(byFile[i])))
185 prev = None
186 for j in byFile[i]:
187 if prev is None or j != prev + 1:
188 if prev:
189 sys.stderr.write("%d][" % prev)
190 sys.stderr.write(str(j))
191 sys.stderr.write(":")
192 prev = j
193 if byFile[i]:
194 sys.stderr.write(str(byFile[i][-1]))
195 sys.stderr.write("] ")
196 else:
197 print(
198 ", ".join(
200 "%s:%d tokens" % (file, len(byFile[i]))
201 for i, (file, _) in enumerate(self.tokenLists)
204 end=" ",
205 file=sys.stderr,
208 p = subprocess.Popen([self.testProgram] + self.tempFiles)
209 res = p.wait() == 0
211 if res:
212 self.writeFiles(changes, self.targetFiles)
214 if self.log:
215 print("=> %s" % res, file=sys.stderr)
216 else:
217 if res:
218 print("\nSUCCESS (%d tokens)" % len(changes))
219 else:
220 sys.stderr.write(".")
222 return res
224 def run(self):
225 res = super(TMBDDelta, self).run(
227 (i, j)
228 for i, (file, tokens) in enumerate(self.tokenLists)
229 for j in range(len(tokens))
232 self.writeFiles(res, self.targetFiles)
233 if not self.log:
234 print(file=sys.stderr)
235 return res
238 def tokenBasedMultiDelta(program, files, log):
239 # Read in the lists of tokens.
240 tokenLists = [(file, [t.data for t in getTokens(file)]) for file in files]
242 numTokens = sum([len(tokens) for _, tokens in tokenLists])
243 print("Delta on %s with %d tokens." % (", ".join(files), numTokens))
245 tbmd = TMBDDelta(program, tokenLists, log)
247 res = tbmd.run()
249 print(
250 "Finished %s with %d tokens (in %d tests)."
251 % (", ".join(tbmd.targetFiles), len(res), tbmd.numTests)
255 def main():
256 from optparse import OptionParser, OptionGroup
258 parser = OptionParser("%prog <test program> {files+}")
259 parser.add_option(
261 "--debug",
262 dest="debugLevel",
263 help="set debug level [default %default]",
264 action="store",
265 type=int,
266 default=0,
268 (opts, args) = parser.parse_args()
270 if len(args) <= 1:
271 parser.error("Invalid number of arguments.")
273 program, files = args[0], args[1:]
275 md = tokenBasedMultiDelta(program, files, log=opts.debugLevel)
278 if __name__ == "__main__":
279 try:
280 main()
281 except KeyboardInterrupt:
282 print("Interrupted.", file=sys.stderr)
283 os._exit(1) # Avoid freeing our giant cache.