Teach symstore more duplicated DLLs
[LibreOffice.git] / bin / find-most-common-warn-messages.py
blobdc2ecf8ab0148ae7f9dc3eb4614965429ec35b3a
1 #!/usr/bin/python3
3 # A script to search our test logs and sort the messages by how common they are so we can start to
4 # reduce the noise a little.
6 import sys
7 import re
8 import io
9 import subprocess
11 # find . -name '*.log' | xargs grep -h 'warn:' | sort | uniq -c | sort -n --field-separator=: --key=5,6
13 process = subprocess.Popen("find workdir -name '*.log' | xargs grep -h 'warn:' | sort",
14 shell=True, stdout=subprocess.PIPE, universal_newlines=True)
16 messages = dict() # dict of sourceAndLine->count
17 sampleOfMessage = dict() # dict of sourceAndLine->string
18 for line in process.stdout:
19 line = line.strip()
20 # a sample line is:
21 # warn:sw:18790:1:sw/source/core/doc/DocumentRedlineManager.cxx:98: redline table corrupted: overlapping redlines
22 tokens = line.split(":")
23 sourceAndLine = tokens[4] + ":" + tokens[5]
24 if (sourceAndLine in messages):
25 messages[sourceAndLine] = messages[sourceAndLine] + 1
26 else:
27 messages[sourceAndLine] = 1
28 sampleOfMessage[sourceAndLine] = line[line.find(tokens[6]):]
30 tmplist = list() # set of tuple (count, sourceAndLine)
31 for key, value in messages.items():
32 tmplist.append([value,key])
34 print( "The top 20 warnings" )
35 print("")
36 for i in sorted(tmplist, key=lambda v: v[0])[-20:]:
37 print( "%6d %s %s" % (i[0], i[1], sampleOfMessage[i[1]]) )