bump product version to 7.2.5.1
[LibreOffice.git] / compilerplugins / clang / pahole-all-classes.py
blob6037287a82ca3734502d42430d9b23d52d3b2b46
1 #!/usr/bin/python3
3 # Find holes in structures, so that we can pack them and improve our memory density.
5 # In order to make this work, you need to
6 # (1) Be operating in a workspace where you have a __NON-DEBUG__ build of LibreOffice, but __WITH SYMBOLS__.
7 # (A debug build has different sizes for some things in the standard library.)
8 # (2) First run the unusedfields loplugin to generate a log file
9 # (3) Install the pahole stuff into your gdb, I used this one:
10 # https://github.com/PhilArmstrong/pahole-gdb
11 # (4) Run the script
12 # ./compilerplugins/clang/pahole-all-classes.py
15 import _thread
16 import io
17 import os
18 import subprocess
19 import time
20 import re
22 # search for all the class names in the file produced by the unusedfields loplugin
23 #a = subprocess.Popen("grep 'definition:' workdir/loplugin.unusedfields.log | sort -u", stdout=subprocess.PIPE, shell=True)
24 a = subprocess.Popen("cat n1", stdout=subprocess.PIPE, shell=True)
26 classSet = set()
27 classSourceLocDict = dict()
28 locToClassDict = dict()
29 with a.stdout as txt:
30 for line in txt:
31 tokens = line.decode('utf8').strip().split("\t")
32 className = tokens[2].strip()
33 srcLoc = tokens[5].strip()
34 # ignore things like unions
35 if "anonymous" in className: continue
36 # ignore duplicates
37 if className in classSet: continue
38 classSet.add(className)
39 classSourceLocDict[className] = srcLoc
40 locToClassDict[srcLoc] = className
41 a.terminate()
43 # Some of the pahole commands are going to fail, and I cannot read the error stream and the input stream
44 # together because python has no way of (easily) doing a non-blocking read.
45 # So I have to write the commands out using a background thread, and then read the entire resulting
46 # stream out below.
47 def write_pahole_commands(classes):
48 for className in classes:
49 stdin.write("echo " + className + " " + classSourceLocDict[className] + "\n")
50 stdin.write("pahole " + className + "\n")
51 stdin.flush()
52 stdin.write("echo all-done\n")
53 stdin.flush()
54 stdin.close() # only way to make it flush the last echo command
56 # Use generator because lines often end up merged together in gdb's output, and we need
57 # to split them up, and that creates a mess in the parsing logic.
58 def read_generator(gdbOutput):
59 while True:
60 line = gdbOutput.readline();
61 if line == "": return # end of file
62 line = line.decode('utf8').strip()
63 print("gdb: " + line)
64 for split in line.split("(gdb)"):
65 split = split.strip()
66 if len(split) == 0: continue
67 if "all-done" in split: return
68 yield split
70 # build list of classes sorted by source location to increase the chances of
71 # processing stuff stored in the same DSO together
72 sortedLocs = sorted(locToClassDict.keys())
73 classList = list()
74 for src in sortedLocs:
75 if "/inc/" in src or "include/" in src:
76 classList.append(locToClassDict[src])
78 with open("compilerplugins/clang/pahole.results", "wt") as f:
79 # Process 400 classes at a time, otherwise gdb's memory usage blows up and kills the machine
80 # This number is chosen to make gdb peak at around 8G.
81 while len(classList) > 0:
83 currClassList = classList[0:500];
84 classList = classList[500:]
86 gdbProc = subprocess.Popen("gdb", stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
88 stdin = io.TextIOWrapper(gdbProc.stdin, 'utf-8')
90 # make gdb load all the debugging info
91 stdin.write("set confirm off\n")
92 # make gdb not wrap output and mess up my parsing
93 stdin.write("set width unlimited\n")
94 for filename in sorted(os.listdir('instdir/program')):
95 if filename.endswith(".so"):
96 stdin.write("add-symbol-file instdir/program/" + filename + "\n")
97 stdin.flush()
100 _thread.start_new_thread( write_pahole_commands, (currClassList,) )
102 firstLineRegex = re.compile(r"/\*\s+(\d+)\s+\*/ struct") # /* 16 */ struct Foo
103 fieldLineRegex = re.compile(r"/\*\s+(\d+)\s+(\d+)\s+\*/ ") # /* 12 8 */ class rtl::OUString aName
104 holeLineRegex = re.compile(r"/\* XXX (\d+) bit hole, try to pack \*/")
105 # sometimes pahole can't determine the size of a sub-struct, and then it returns bad data
106 bogusLineRegex = re.compile(r"/\*\s+\d+\s+0\s+\*/")
107 structLines = list()
108 foundHole = False
109 cumulativeHoleBits = 0
110 alignedStructSize = 0
111 foundBogusLine = False
112 # pahole doesn't report space at the end of the structure, so work it out myself
113 sizeOfStructWithoutPadding = 0
114 for line in read_generator(gdbProc.stdout):
115 structLines.append(line)
116 firstLineMatch = firstLineRegex.match(line)
117 if firstLineMatch:
118 alignedStructSize = int(firstLineMatch.group(1))
119 structLines.clear()
120 structLines.append(line)
121 holeLineMatch = holeLineRegex.match(line)
122 if holeLineMatch:
123 foundHole = True
124 cumulativeHoleBits += int(holeLineMatch.group(1))
125 fieldLineMatch = fieldLineRegex.match(line)
126 if fieldLineMatch:
127 fieldPosInBytes = int(fieldLineMatch.group(1))
128 fieldSizeInBytes = int(fieldLineMatch.group(2))
129 sizeOfStructWithoutPadding = fieldPosInBytes + fieldSizeInBytes
130 if bogusLineRegex.match(line):
131 foundBogusLine = True
132 if line == "}":
133 # Ignore very large structs, packing those is not going to help much, and
134 # re-organising them can make them much less readable.
135 if foundHole and len(structLines) < 16 and alignedStructSize < 100 and not foundBogusLine:
136 # Verify that, after packing, and compiler alignment, the new structure will be actually smaller.
137 # Sometimes, we can save space, but the compiler will align the structure such that we don't
138 # actually save any space.
139 # TODO improve detection of the required alignment for a structure
140 holeAtEnd = alignedStructSize - sizeOfStructWithoutPadding
141 potentialSpace = (cumulativeHoleBits / 8) + holeAtEnd
142 if potentialSpace >= 8:
143 for line in structLines:
144 f.write(line + "\n")
145 if holeAtEnd > 0:
146 f.write("hole at end of struct: " + str(holeAtEnd) + "\n")
147 f.write("\n")
148 # reset state
149 structLines.clear()
150 foundHole = False
151 cumulativeHoleBits = 0
152 structSize = 0
153 foundBogusLine = False
154 actualStructSize = 0
156 gdbProc.terminate()