cygprofile: increase timeouts to allow showing web contents
[chromium-blink-merge.git] / tools / metrics / histograms / find_unmapped_histograms.py
blobd17870263b63af72304a9073b69d88c882d566d5
1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Scans the Chromium source for histograms that are absent from histograms.xml.
7 This is a heuristic scan, so a clean run of this script does not guarantee that
8 all histograms in the Chromium source are properly mapped. Notably, field
9 trials are entirely ignored by this script.
11 """
13 import hashlib
14 import logging
15 import optparse
16 import os
17 import re
18 import subprocess
19 import sys
21 sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
22 import path_util
24 import extract_histograms
27 ADJACENT_C_STRING_REGEX = re.compile(r"""
28 (" # Opening quotation mark
29 [^"]*) # Literal string contents
30 " # Closing quotation mark
31 \s* # Any number of spaces
32 " # Another opening quotation mark
33 """, re.VERBOSE)
34 CONSTANT_REGEX = re.compile(r"""
35 (\w*::)? # Optional namespace
36 k[A-Z] # Match a constant identifier: 'k' followed by an uppercase letter
37 \w* # Match the rest of the constant identifier
38 $ # Make sure there's only the identifier, nothing else
39 """, re.VERBOSE)
40 HISTOGRAM_REGEX = re.compile(r"""
41 UMA_HISTOGRAM # Match the shared prefix for standard UMA histogram macros
42 \w* # Match the rest of the macro name, e.g. '_ENUMERATION'
43 \( # Match the opening parenthesis for the macro
44 \s* # Match any whitespace -- especially, any newlines
45 ([^,)]*) # Capture the first parameter to the macro
46 [,)] # Match the comma/paren that delineates the first parameter
47 """, re.VERBOSE)
50 def RunGit(command):
51 """Run a git subcommand, returning its output."""
52 # On Windows, use shell=True to get PATH interpretation.
53 command = ['git'] + command
54 logging.info(' '.join(command))
55 shell = (os.name == 'nt')
56 proc = subprocess.Popen(command, shell=shell, stdout=subprocess.PIPE)
57 out = proc.communicate()[0].strip()
58 return out
61 class DirectoryNotFoundException(Exception):
62 """Base class to distinguish locally defined exceptions from standard ones."""
63 def __init__(self, msg):
64 self.msg = msg
66 def __str__(self):
67 return self.msg
70 def collapseAdjacentCStrings(string):
71 """Collapses any adjacent C strings into a single string.
73 Useful to re-combine strings that were split across multiple lines to satisfy
74 the 80-col restriction.
76 Args:
77 string: The string to recombine, e.g. '"Foo"\n "bar"'
79 Returns:
80 The collapsed string, e.g. "Foobar" for an input of '"Foo"\n "bar"'
81 """
82 while True:
83 collapsed = ADJACENT_C_STRING_REGEX.sub(r'\1', string, count=1)
84 if collapsed == string:
85 return collapsed
87 string = collapsed
90 def logNonLiteralHistogram(filename, histogram):
91 """Logs a statement warning about a non-literal histogram name found in the
92 Chromium source.
94 Filters out known acceptable exceptions.
96 Args:
97 filename: The filename for the file containing the histogram, e.g.
98 'chrome/browser/memory_details.cc'
99 histogram: The expression that evaluates to the name of the histogram, e.g.
100 '"FakeHistogram" + variant'
102 Returns:
103 None
105 # Ignore histogram macros, which typically contain backslashes so that they
106 # can be formatted across lines.
107 if '\\' in histogram:
108 return
110 # Ignore histogram names that have been pulled out into C++ constants.
111 if CONSTANT_REGEX.match(histogram):
112 return
114 # TODO(isherman): This is still a little noisy... needs further filtering to
115 # reduce the noise.
116 logging.warning('%s contains non-literal histogram name <%s>', filename,
117 histogram)
120 def readChromiumHistograms():
121 """Searches the Chromium source for all histogram names.
123 Also prints warnings for any invocations of the UMA_HISTOGRAM_* macros with
124 names that might vary during a single run of the app.
126 Returns:
127 A set containing any found literal histogram names.
129 logging.info('Scanning Chromium source for histograms...')
131 # Use git grep to find all invocations of the UMA_HISTOGRAM_* macros.
132 # Examples:
133 # 'path/to/foo.cc:420: UMA_HISTOGRAM_COUNTS_100("FooGroup.FooName",'
134 # 'path/to/bar.cc:632: UMA_HISTOGRAM_ENUMERATION('
135 locations = RunGit(['gs', 'UMA_HISTOGRAM']).split('\n')
136 filenames = set([location.split(':')[0] for location in locations])
138 histograms = set()
139 for filename in filenames:
140 contents = ''
141 with open(filename, 'r') as f:
142 contents = f.read()
144 matches = set(HISTOGRAM_REGEX.findall(contents))
145 for histogram in matches:
146 histogram = collapseAdjacentCStrings(histogram)
148 # Must begin and end with a quotation mark.
149 if not histogram or histogram[0] != '"' or histogram[-1] != '"':
150 logNonLiteralHistogram(filename, histogram)
151 continue
153 # Must not include any quotation marks other than at the beginning or end.
154 histogram_stripped = histogram.strip('"')
155 if '"' in histogram_stripped:
156 logNonLiteralHistogram(filename, histogram)
157 continue
159 histograms.add(histogram_stripped)
161 return histograms
164 def readXmlHistograms(histograms_file_location):
165 """Parses all histogram names from histograms.xml.
167 Returns:
168 A set cotaining the parsed histogram names.
170 logging.info('Reading histograms from %s...' % histograms_file_location)
171 histograms = extract_histograms.ExtractHistograms(histograms_file_location)
172 return set(extract_histograms.ExtractNames(histograms))
175 def hashHistogramName(name):
176 """Computes the hash of a histogram name.
178 Args:
179 name: The string to hash (a histogram name).
181 Returns:
182 Histogram hash as a string representing a hex number (with leading 0x).
184 return '0x' + hashlib.md5(name).hexdigest()[:16]
187 def main():
188 # Find default paths.
189 default_root = path_util.GetInputFile('/')
190 default_histograms_path = path_util.GetInputFile(
191 'tools/metrics/histograms/histograms.xml')
192 default_extra_histograms_path = path_util.GetInputFile(
193 'tools/histograms/histograms.xml')
195 # Parse command line options
196 parser = optparse.OptionParser()
197 parser.add_option(
198 '--root-directory', dest='root_directory', default=default_root,
199 help='scan within DIRECTORY for histograms [optional, defaults to "%s"]' %
200 default_root,
201 metavar='DIRECTORY')
202 parser.add_option(
203 '--histograms-file', dest='histograms_file_location',
204 default=default_histograms_path,
205 help='read histogram definitions from FILE (relative to --root-directory) '
206 '[optional, defaults to "%s"]' % default_histograms_path,
207 metavar='FILE')
208 parser.add_option(
209 '--exrta_histograms-file', dest='extra_histograms_file_location',
210 default=default_extra_histograms_path,
211 help='read additional histogram definitions from FILE (relative to '
212 '--root-directory) [optional, defaults to "%s"]' %
213 default_extra_histograms_path,
214 metavar='FILE')
216 (options, args) = parser.parse_args()
217 if args:
218 parser.print_help()
219 sys.exit(1)
221 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
223 try:
224 os.chdir(options.root_directory)
225 except EnvironmentError as e:
226 logging.error("Could not change to root directory: %s", e)
227 sys.exit(1)
228 chromium_histograms = readChromiumHistograms()
229 xml_histograms = readXmlHistograms(options.histograms_file_location)
230 unmapped_histograms = chromium_histograms - xml_histograms
232 if os.path.isfile(options.extra_histograms_file_location):
233 xml_histograms2 = readXmlHistograms(options.extra_histograms_file_location)
234 unmapped_histograms -= xml_histograms2
235 else:
236 logging.warning('No such file: %s', options.extra_histograms_file_location)
238 if len(unmapped_histograms):
239 logging.info('')
240 logging.info('')
241 logging.info('Histograms in Chromium but not in XML files:')
242 logging.info('-------------------------------------------------')
243 for histogram in sorted(unmapped_histograms):
244 logging.info(' %s - %s', histogram, hashHistogramName(histogram))
245 else:
246 logging.info('Success! No unmapped histograms found.')
249 if __name__ == '__main__':
250 main()