Battery Status API: add UMA logging for Linux.
[chromium-blink-merge.git] / tools / metrics / histograms / find_unmapped_histograms.py
blob443d1c6fa637413f68099e60d8f146bb65db4506
1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Scans the Chromium source for histograms that are absent from histograms.xml.
7 This is a heuristic scan, so a clean run of this script does not guarantee that
8 all histograms in the Chromium source are properly mapped. Notably, field
9 trials are entirely ignored by this script.
11 """
13 import commands
14 import extract_histograms
15 import hashlib
16 import logging
17 import optparse
18 import os
19 import re
20 import sys
23 ADJACENT_C_STRING_REGEX = re.compile(r"""
24 (" # Opening quotation mark
25 [^"]*) # Literal string contents
26 " # Closing quotation mark
27 \s* # Any number of spaces
28 " # Another opening quotation mark
29 """, re.VERBOSE)
30 CONSTANT_REGEX = re.compile(r"""
31 (\w*::)? # Optional namespace
32 k[A-Z] # Match a constant identifier: 'k' followed by an uppercase letter
33 \w* # Match the rest of the constant identifier
34 $ # Make sure there's only the identifier, nothing else
35 """, re.VERBOSE)
36 HISTOGRAM_REGEX = re.compile(r"""
37 UMA_HISTOGRAM # Match the shared prefix for standard UMA histogram macros
38 \w* # Match the rest of the macro name, e.g. '_ENUMERATION'
39 \( # Match the opening parenthesis for the macro
40 \s* # Match any whitespace -- especially, any newlines
41 ([^,]*) # Capture the first parameter to the macro
42 , # Match the comma that delineates the first parameter
43 """, re.VERBOSE)
46 class DirectoryNotFoundException(Exception):
47 """Base class to distinguish locally defined exceptions from standard ones."""
48 def __init__(self, msg):
49 self.msg = msg
51 def __str__(self):
52 return self.msg
55 def findDefaultRoot():
56 """Find the root of the chromium repo, in case the script is run from the
57 histograms dir.
59 Returns:
60 string: path to the src dir of the repo.
62 Raises:
63 DirectoryNotFoundException if the target directory cannot be found.
64 """
65 path = os.getcwd()
66 while path:
67 head, tail = os.path.split(path)
68 if tail == 'src':
69 return path
70 if path == head:
71 break
72 path = head
73 raise DirectoryNotFoundException('Could not find src/ dir')
76 def collapseAdjacentCStrings(string):
77 """Collapses any adjacent C strings into a single string.
79 Useful to re-combine strings that were split across multiple lines to satisfy
80 the 80-col restriction.
82 Args:
83 string: The string to recombine, e.g. '"Foo"\n "bar"'
85 Returns:
86 The collapsed string, e.g. "Foobar" for an input of '"Foo"\n "bar"'
87 """
88 while True:
89 collapsed = ADJACENT_C_STRING_REGEX.sub(r'\1', string, count=1)
90 if collapsed == string:
91 return collapsed
93 string = collapsed
96 def logNonLiteralHistogram(filename, histogram):
97 """Logs a statement warning about a non-literal histogram name found in the
98 Chromium source.
100 Filters out known acceptable exceptions.
102 Args:
103 filename: The filename for the file containing the histogram, e.g.
104 'chrome/browser/memory_details.cc'
105 histogram: The expression that evaluates to the name of the histogram, e.g.
106 '"FakeHistogram" + variant'
108 Returns:
109 None
111 # Ignore histogram macros, which typically contain backslashes so that they
112 # can be formatted across lines.
113 if '\\' in histogram:
114 return
116 # Field trials are unique within a session, so are effectively constants.
117 if histogram.startswith('base::FieldTrial::MakeName'):
118 return
120 # Ignore histogram names that have been pulled out into C++ constants.
121 if CONSTANT_REGEX.match(histogram):
122 return
124 # TODO(isherman): This is still a little noisy... needs further filtering to
125 # reduce the noise.
126 logging.warning('%s contains non-literal histogram name <%s>', filename,
127 histogram)
130 def readChromiumHistograms():
131 """Searches the Chromium source for all histogram names.
133 Also prints warnings for any invocations of the UMA_HISTOGRAM_* macros with
134 names that might vary during a single run of the app.
136 Returns:
137 A set cotaining any found literal histogram names.
139 logging.info('Scanning Chromium source for histograms...')
141 # Use git grep to find all invocations of the UMA_HISTOGRAM_* macros.
142 # Examples:
143 # 'path/to/foo.cc:420: UMA_HISTOGRAM_COUNTS_100("FooGroup.FooName",'
144 # 'path/to/bar.cc:632: UMA_HISTOGRAM_ENUMERATION('
145 locations = commands.getoutput('git gs UMA_HISTOGRAM').split('\n')
146 filenames = set([location.split(':')[0] for location in locations])
148 histograms = set()
149 for filename in filenames:
150 contents = ''
151 with open(filename, 'r') as f:
152 contents = f.read()
154 matches = set(HISTOGRAM_REGEX.findall(contents))
155 for histogram in matches:
156 histogram = collapseAdjacentCStrings(histogram)
158 # Must begin and end with a quotation mark.
159 if histogram[0] != '"' or histogram[-1] != '"':
160 logNonLiteralHistogram(filename, histogram)
161 continue
163 # Must not include any quotation marks other than at the beginning or end.
164 histogram_stripped = histogram.strip('"')
165 if '"' in histogram_stripped:
166 logNonLiteralHistogram(filename, histogram)
167 continue
169 histograms.add(histogram_stripped)
171 return histograms
174 def readXmlHistograms(histograms_file_location):
175 """Parses all histogram names from histograms.xml.
177 Returns:
178 A set cotaining the parsed histogram names.
180 logging.info('Reading histograms from %s...' % histograms_file_location)
181 histograms = extract_histograms.ExtractHistograms(histograms_file_location)
182 return set(extract_histograms.ExtractNames(histograms))
185 def hashHistogramName(name):
186 """Computes the hash of a histogram name.
188 Args:
189 name: The string to hash (a histogram name).
191 Returns:
192 Histogram hash as a string representing a hex number (with leading 0x).
194 return '0x' + hashlib.md5(name).hexdigest()[:16]
197 def main():
198 # Find default paths.
199 default_root = findDefaultRoot()
200 default_histograms_path = os.path.join(
201 default_root, 'tools/metrics/histograms/histograms.xml')
202 default_extra_histograms_path = os.path.join(
203 default_root, 'tools/histograms/histograms.xml')
205 # Parse command line options
206 parser = optparse.OptionParser()
207 parser.add_option(
208 '--root-directory', dest='root_directory', default=default_root,
209 help='scan within DIRECTORY for histograms [optional, defaults to "%s"]' %
210 default_root,
211 metavar='DIRECTORY')
212 parser.add_option(
213 '--histograms-file', dest='histograms_file_location',
214 default=default_histograms_path,
215 help='read histogram definitions from FILE (relative to --root-directory) '
216 '[optional, defaults to "%s"]' % default_histograms_path,
217 metavar='FILE')
218 parser.add_option(
219 '--exrta_histograms-file', dest='extra_histograms_file_location',
220 default=default_extra_histograms_path,
221 help='read additional histogram definitions from FILE (relative to '
222 '--root-directory) [optional, defaults to "%s"]' %
223 default_extra_histograms_path,
224 metavar='FILE')
226 (options, args) = parser.parse_args()
227 if args:
228 parser.print_help()
229 sys.exit(1)
231 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
233 try:
234 os.chdir(options.root_directory)
235 except EnvironmentError as e:
236 logging.error("Could not change to root directory: %s", e)
237 sys.exit(1)
238 chromium_histograms = readChromiumHistograms()
239 xml_histograms = readXmlHistograms(options.histograms_file_location)
240 unmapped_histograms = chromium_histograms - xml_histograms
242 if os.path.isfile(options.extra_histograms_file_location):
243 xml_histograms2 = readXmlHistograms(options.extra_histograms_file_location)
244 unmapped_histograms -= xml_histograms2
245 else:
246 logging.warning('No such file: %s', options.extra_histograms_file_location)
248 if len(unmapped_histograms):
249 logging.info('')
250 logging.info('')
251 logging.info('Histograms in Chromium but not in XML files:')
252 logging.info('-------------------------------------------------')
253 for histogram in sorted(unmapped_histograms):
254 logging.info(' %s - %s', histogram, hashHistogramName(histogram))
255 else:
256 logging.info('Success! No unmapped histograms found.')
259 if __name__ == '__main__':
260 main()