1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Scans the Chromium source for histograms that are absent from histograms.xml.
7 This is a heuristic scan, so a clean run of this script does not guarantee that
8 all histograms in the Chromium source are properly mapped. Notably, field
9 trials are entirely ignored by this script.
21 sys
.path
.append(os
.path
.join(os
.path
.dirname(__file__
), '..', 'common'))
24 import extract_histograms
27 ADJACENT_C_STRING_REGEX
= re
.compile(r
"""
28 (" # Opening quotation mark
29 [^"]*) # Literal string contents
30 " # Closing quotation mark
31 \s* # Any number of spaces
32 " # Another opening quotation mark
34 CONSTANT_REGEX
= re
.compile(r
"""
35 (\w*::)? # Optional namespace
36 k[A-Z] # Match a constant identifier: 'k' followed by an uppercase letter
37 \w* # Match the rest of the constant identifier
38 $ # Make sure there's only the identifier, nothing else
40 HISTOGRAM_REGEX
= re
.compile(r
"""
41 UMA_HISTOGRAM # Match the shared prefix for standard UMA histogram macros
42 \w* # Match the rest of the macro name, e.g. '_ENUMERATION'
43 \( # Match the opening parenthesis for the macro
44 \s* # Match any whitespace -- especially, any newlines
45 ([^,)]*) # Capture the first parameter to the macro
46 [,)] # Match the comma/paren that delineates the first parameter
51 """Run a git subcommand, returning its output."""
52 # On Windows, use shell=True to get PATH interpretation.
53 command
= ['git'] + command
54 logging
.info(' '.join(command
))
55 shell
= (os
.name
== 'nt')
56 proc
= subprocess
.Popen(command
, shell
=shell
, stdout
=subprocess
.PIPE
)
57 out
= proc
.communicate()[0].strip()
61 class DirectoryNotFoundException(Exception):
62 """Base class to distinguish locally defined exceptions from standard ones."""
63 def __init__(self
, msg
):
70 def collapseAdjacentCStrings(string
):
71 """Collapses any adjacent C strings into a single string.
73 Useful to re-combine strings that were split across multiple lines to satisfy
74 the 80-col restriction.
77 string: The string to recombine, e.g. '"Foo"\n "bar"'
80 The collapsed string, e.g. "Foobar" for an input of '"Foo"\n "bar"'
83 collapsed
= ADJACENT_C_STRING_REGEX
.sub(r
'\1', string
, count
=1)
84 if collapsed
== string
:
90 def logNonLiteralHistogram(filename
, histogram
):
91 """Logs a statement warning about a non-literal histogram name found in the
94 Filters out known acceptable exceptions.
97 filename: The filename for the file containing the histogram, e.g.
98 'chrome/browser/memory_details.cc'
99 histogram: The expression that evaluates to the name of the histogram, e.g.
100 '"FakeHistogram" + variant'
105 # Ignore histogram macros, which typically contain backslashes so that they
106 # can be formatted across lines.
107 if '\\' in histogram
:
110 # Ignore histogram names that have been pulled out into C++ constants.
111 if CONSTANT_REGEX
.match(histogram
):
114 # TODO(isherman): This is still a little noisy... needs further filtering to
116 logging
.warning('%s contains non-literal histogram name <%s>', filename
,
120 def readChromiumHistograms():
121 """Searches the Chromium source for all histogram names.
123 Also prints warnings for any invocations of the UMA_HISTOGRAM_* macros with
124 names that might vary during a single run of the app.
127 A set containing any found literal histogram names.
129 logging
.info('Scanning Chromium source for histograms...')
131 # Use git grep to find all invocations of the UMA_HISTOGRAM_* macros.
133 # 'path/to/foo.cc:420: UMA_HISTOGRAM_COUNTS_100("FooGroup.FooName",'
134 # 'path/to/bar.cc:632: UMA_HISTOGRAM_ENUMERATION('
135 locations
= RunGit(['gs', 'UMA_HISTOGRAM']).split('\n')
136 filenames
= set([location
.split(':')[0] for location
in locations
])
139 for filename
in filenames
:
141 with
open(filename
, 'r') as f
:
144 matches
= set(HISTOGRAM_REGEX
.findall(contents
))
145 for histogram
in matches
:
146 histogram
= collapseAdjacentCStrings(histogram
)
148 # Must begin and end with a quotation mark.
149 if not histogram
or histogram
[0] != '"' or histogram
[-1] != '"':
150 logNonLiteralHistogram(filename
, histogram
)
153 # Must not include any quotation marks other than at the beginning or end.
154 histogram_stripped
= histogram
.strip('"')
155 if '"' in histogram_stripped
:
156 logNonLiteralHistogram(filename
, histogram
)
159 histograms
.add(histogram_stripped
)
164 def readXmlHistograms(histograms_file_location
):
165 """Parses all histogram names from histograms.xml.
168 A set cotaining the parsed histogram names.
170 logging
.info('Reading histograms from %s...' % histograms_file_location
)
171 histograms
= extract_histograms
.ExtractHistograms(histograms_file_location
)
172 return set(extract_histograms
.ExtractNames(histograms
))
175 def hashHistogramName(name
):
176 """Computes the hash of a histogram name.
179 name: The string to hash (a histogram name).
182 Histogram hash as a string representing a hex number (with leading 0x).
184 return '0x' + hashlib
.md5(name
).hexdigest()[:16]
188 # Find default paths.
189 default_root
= path_util
.GetInputFile('/')
190 default_histograms_path
= path_util
.GetInputFile(
191 'tools/metrics/histograms/histograms.xml')
192 default_extra_histograms_path
= path_util
.GetInputFile(
193 'tools/histograms/histograms.xml')
195 # Parse command line options
196 parser
= optparse
.OptionParser()
198 '--root-directory', dest
='root_directory', default
=default_root
,
199 help='scan within DIRECTORY for histograms [optional, defaults to "%s"]' %
203 '--histograms-file', dest
='histograms_file_location',
204 default
=default_histograms_path
,
205 help='read histogram definitions from FILE (relative to --root-directory) '
206 '[optional, defaults to "%s"]' % default_histograms_path
,
209 '--exrta_histograms-file', dest
='extra_histograms_file_location',
210 default
=default_extra_histograms_path
,
211 help='read additional histogram definitions from FILE (relative to '
212 '--root-directory) [optional, defaults to "%s"]' %
213 default_extra_histograms_path
,
216 (options
, args
) = parser
.parse_args()
221 logging
.basicConfig(format
='%(levelname)s: %(message)s', level
=logging
.INFO
)
224 os
.chdir(options
.root_directory
)
225 except EnvironmentError as e
:
226 logging
.error("Could not change to root directory: %s", e
)
228 chromium_histograms
= readChromiumHistograms()
229 xml_histograms
= readXmlHistograms(options
.histograms_file_location
)
230 unmapped_histograms
= chromium_histograms
- xml_histograms
232 if os
.path
.isfile(options
.extra_histograms_file_location
):
233 xml_histograms2
= readXmlHistograms(options
.extra_histograms_file_location
)
234 unmapped_histograms
-= xml_histograms2
236 logging
.warning('No such file: %s', options
.extra_histograms_file_location
)
238 if len(unmapped_histograms
):
241 logging
.info('Histograms in Chromium but not in XML files:')
242 logging
.info('-------------------------------------------------')
243 for histogram
in sorted(unmapped_histograms
):
244 logging
.info(' %s - %s', histogram
, hashHistogramName(histogram
))
246 logging
.info('Success! No unmapped histograms found.')
249 if __name__
== '__main__':