[llvm-exegesis][NFC] Improve parsing of the YAML files
[llvm-core.git] / utils / benchmark / tools / strip_asm.py
blob9030550b43bece88d41bac337e26004a109232d5
1 #!/usr/bin/env python
3 """
4 strip_asm.py - Cleanup ASM output for the specified file
5 """
7 from argparse import ArgumentParser
8 import sys
9 import os
10 import re
12 def find_used_labels(asm):
13 found = set()
14 label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
15 for l in asm.splitlines():
16 m = label_re.match(l)
17 if m:
18 found.add('.L%s' % m.group(1))
19 return found
22 def normalize_labels(asm):
23 decls = set()
24 label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
25 for l in asm.splitlines():
26 m = label_decl.match(l)
27 if m:
28 decls.add(m.group(0))
29 if len(decls) == 0:
30 return asm
31 needs_dot = next(iter(decls))[0] != '.'
32 if not needs_dot:
33 return asm
34 for ld in decls:
35 asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
36 return asm
39 def transform_labels(asm):
40 asm = normalize_labels(asm)
41 used_decls = find_used_labels(asm)
42 new_asm = ''
43 label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
44 for l in asm.splitlines():
45 m = label_decl.match(l)
46 if not m or m.group(0) in used_decls:
47 new_asm += l
48 new_asm += '\n'
49 return new_asm
52 def is_identifier(tk):
53 if len(tk) == 0:
54 return False
55 first = tk[0]
56 if not first.isalpha() and first != '_':
57 return False
58 for i in range(1, len(tk)):
59 c = tk[i]
60 if not c.isalnum() and c != '_':
61 return False
62 return True
64 def process_identifiers(l):
65 """
66 process_identifiers - process all identifiers and modify them to have
67 consistent names across all platforms; specifically across ELF and MachO.
68 For example, MachO inserts an additional understore at the beginning of
69 names. This function removes that.
70 """
71 parts = re.split(r'([a-zA-Z0-9_]+)', l)
72 new_line = ''
73 for tk in parts:
74 if is_identifier(tk):
75 if tk.startswith('__Z'):
76 tk = tk[1:]
77 elif tk.startswith('_') and len(tk) > 1 and \
78 tk[1].isalpha() and tk[1] != 'Z':
79 tk = tk[1:]
80 new_line += tk
81 return new_line
84 def process_asm(asm):
85 """
86 Strip the ASM of unwanted directives and lines
87 """
88 new_contents = ''
89 asm = transform_labels(asm)
91 # TODO: Add more things we want to remove
92 discard_regexes = [
93 re.compile("\s+\..*$"), # directive
94 re.compile("\s*#(NO_APP|APP)$"), #inline ASM
95 re.compile("\s*#.*$"), # comment line
96 re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
97 re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
99 keep_regexes = [
102 fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
103 for l in asm.splitlines():
104 # Remove Mach-O attribute
105 l = l.replace('@GOTPCREL', '')
106 add_line = True
107 for reg in discard_regexes:
108 if reg.match(l) is not None:
109 add_line = False
110 break
111 for reg in keep_regexes:
112 if reg.match(l) is not None:
113 add_line = True
114 break
115 if add_line:
116 if fn_label_def.match(l) and len(new_contents) != 0:
117 new_contents += '\n'
118 l = process_identifiers(l)
119 new_contents += l
120 new_contents += '\n'
121 return new_contents
123 def main():
124 parser = ArgumentParser(
125 description='generate a stripped assembly file')
126 parser.add_argument(
127 'input', metavar='input', type=str, nargs=1,
128 help='An input assembly file')
129 parser.add_argument(
130 'out', metavar='output', type=str, nargs=1,
131 help='The output file')
132 args, unknown_args = parser.parse_known_args()
133 input = args.input[0]
134 output = args.out[0]
135 if not os.path.isfile(input):
136 print(("ERROR: input file '%s' does not exist") % input)
137 sys.exit(1)
138 contents = None
139 with open(input, 'r') as f:
140 contents = f.read()
141 new_contents = process_asm(contents)
142 with open(output, 'w') as f:
143 f.write(new_contents)
146 if __name__ == '__main__':
147 main()
149 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
150 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
151 # kate: indent-mode python; remove-trailing-spaces modified;