source/cpplint.py,v

   1 head    1.1;
   2 access;
   3 symbols;
   4 locks; strict;
   5 comment @# @;
   6
   7
   8 1.1
   9 date    2012.02.11.02.30.51;    author cvsadmin;        state Exp;
  10 branches;
  11 next    ;
  12 deltatype       text;
  13 kopt    kv;
  14 permissions     666;
  15 commitid        13604f35d2db5718;
  16 filename        cpplint.py;
  17
  18
  19 desc
  20 @@
  21
  22
  23 1.1
  24 log
  25 @
  26 Committed on the Free edition of March Hare Software CVSNT Server.
  27 Upgrade to CVS Suite for more features and support:
  28 http://march-hare.com/cvsnt/
  29 @
  30 text
  31 @#!/usr/bin/python2.4
  32 #
  33 # Copyright (c) 2009 Google Inc. All rights reserved.
  34 #
  35 # Redistribution and use in source and binary forms, with or without
  36 # modification, are permitted provided that the following conditions are
  37 # met:
  38 #
  39 #    * Redistributions of source code must retain the above copyright
  40 # notice, this list of conditions and the following disclaimer.
  41 #    * Redistributions in binary form must reproduce the above
  42 # copyright notice, this list of conditions and the following disclaimer
  43 # in the documentation and/or other materials provided with the
  44 # distribution.
  45 #    * Neither the name of Google Inc. nor the names of its
  46 # contributors may be used to endorse or promote products derived from
  47 # this software without specific prior written permission.
  48 #
  49 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  50 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  51 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  52 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  53 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  54 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  55 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  56 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  57 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  58 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  59 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  60
  61 # Here are some issues that I've had people identify in my code during reviews,
  62 # that I think are possible to flag automatically in a lint tool.  If these were
  63 # caught by lint, it would save time both for myself and that of my reviewers.
  64 # Most likely, some of these are beyond the scope of the current lint framework,
  65 # but I think it is valuable to retain these wish-list items even if they cannot
  66 # be immediately implemented.
  67 #
  68 #  Suggestions
  69 #  -----------
  70 #  - Check for no 'explicit' for multi-arg ctor
  71 #  - Check for boolean assign RHS in parens
  72 #  - Check for ctor initializer-list colon position and spacing
  73 #  - Check that if there's a ctor, there should be a dtor
  74 #  - Check accessors that return non-pointer member variables are
  75 #    declared const
  76 #  - Check accessors that return non-const pointer member vars are
  77 #    *not* declared const
  78 #  - Check for using public includes for testing
  79 #  - Check for spaces between brackets in one-line inline method
  80 #  - Check for no assert()
  81 #  - Check for spaces surrounding operators
  82 #  - Check for 0 in pointer context (should be NULL)
  83 #  - Check for 0 in char context (should be '\0')
  84 #  - Check for camel-case method name conventions for methods
  85 #    that are not simple inline getters and setters
  86 #  - Check that base classes have virtual destructors
  87 #    put "  // namespace" after } that closes a namespace, with
  88 #    namespace's name after 'namespace' if it is named.
  89 #  - Do not indent namespace contents
  90 #  - Avoid inlining non-trivial constructors in header files
  91 #    include base/basictypes.h if DISALLOW_EVIL_CONSTRUCTORS is used
  92 #  - Check for old-school (void) cast for call-sites of functions
  93 #    ignored return value
  94 #  - Check gUnit usage of anonymous namespace
  95 #  - Check for class declaration order (typedefs, consts, enums,
  96 #    ctor(s?), dtor, friend declarations, methods, member vars)
  97 #
  98
  99 """Does google-lint on c++ files.
 100
 101 The goal of this script is to identify places in the code that *may*
 102 be in non-compliance with google style.  It does not attempt to fix
 103 up these problems -- the point is to educate.  It does also not
 104 attempt to find all problems, or to ensure that everything it does
 105 find is legitimately a problem.
 106
 107 In particular, we can get very confused by /* and // inside strings!
 108 We do a small hack, which is to ignore //'s with "'s after them on the
 109 same line, but it is far from perfect (in either direction).
 110 """
 111
 112 import codecs
 113 import getopt
 114 import math  # for log
 115 import os
 116 import re
 117 import sre_compile
 118 import string
 119 import sys
 120 import unicodedata
 121
 122
 123 _USAGE = """
 124 Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
 125                    [--counting=total|toplevel|detailed]
 126         <file> [file] ...
 127
 128   The style guidelines this tries to follow are those in
 129     http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
 130
 131   Every problem is given a confidence score from 1-5, with 5 meaning we are
 132   certain of the problem, and 1 meaning it could be a legitimate construct.
 133   This will miss some errors, and is not a substitute for a code review.
 134
 135   To suppress false-positive errors of a certain category, add a
 136   'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
 137   suppresses errors of all categories on that line.
 138
 139   The files passed in will be linted; at least one file must be provided.
 140   Linted extensions are .cc, .cpp, and .h.  Other file types will be ignored.
 141
 142   Flags:
 143
 144     output=vs7
 145       By default, the output is formatted to ease emacs parsing.  Visual Studio
 146       compatible output (vs7) may also be used.  Other formats are unsupported.
 147
 148     verbose=#
 149       Specify a number 0-5 to restrict errors to certain verbosity levels.
 150
 151     filter=-x,+y,...
 152       Specify a comma-separated list of category-filters to apply: only
 153       error messages whose category names pass the filters will be printed.
 154       (Category names are printed with the message and look like
 155       "[whitespace/indent]".)  Filters are evaluated left to right.
 156       "-FOO" and "FOO" means "do not print categories that start with FOO".
 157       "+FOO" means "do print categories that start with FOO".
 158
 159       Examples: --filter=-whitespace,+whitespace/braces
 160                 --filter=whitespace,runtime/printf,+runtime/printf_format
 161                 --filter=-,+build/include_what_you_use
 162
 163       To see a list of all the categories used in cpplint, pass no arg:
 164          --filter=
 165
 166     counting=total|toplevel|detailed
 167       The total number of errors found is always printed. If
 168       'toplevel' is provided, then the count of errors in each of
 169       the top-level categories like 'build' and 'whitespace' will
 170       also be printed. If 'detailed' is provided, then a count
 171       is provided for each category like 'build/class'.
 172 """
 173
 174 # We categorize each error message we print.  Here are the categories.
 175 # We want an explicit list so we can list them all in cpplint --filter=.
 176 # If you add a new error message with a new category, add it to the list
 177 # here!  cpplint_unittest.py should tell you if you forget to do this.
 178 # \ used for clearer layout -- pylint: disable-msg=C6013
 179 _ERROR_CATEGORIES = [
 180   'build/class',
 181   'build/deprecated',
 182   'build/endif_comment',
 183   'build/forward_decl',
 184   'build/header_guard',
 185   'build/include',
 186   'build/include_alpha',
 187   'build/include_order',
 188   'build/include_what_you_use',
 189   'build/namespaces',
 190   'build/printf_format',
 191   'build/storage_class',
 192   'legal/copyright',
 193   'readability/braces',
 194   'readability/casting',
 195   'readability/check',
 196   'readability/constructors',
 197   'readability/fn_size',
 198   'readability/function',
 199   'readability/multiline_comment',
 200   'readability/multiline_string',
 201   'readability/nolint',
 202   'readability/streams',
 203   'readability/todo',
 204   'readability/utf8',
 205   'runtime/arrays',
 206   'runtime/casting',
 207   'runtime/explicit',
 208   'runtime/int',
 209   'runtime/init',
 210   'runtime/invalid_increment',
 211   'runtime/member_string_references',
 212   'runtime/memset',
 213   'runtime/operator',
 214   'runtime/printf',
 215   'runtime/printf_format',
 216   'runtime/references',
 217   'runtime/rtti',
 218   'runtime/sizeof',
 219   'runtime/string',
 220   'runtime/threadsafe_fn',
 221   'runtime/virtual',
 222   'whitespace/blank_line',
 223   'whitespace/braces',
 224   'whitespace/comma',
 225   'whitespace/comments',
 226   'whitespace/end_of_line',
 227   'whitespace/ending_newline',
 228   'whitespace/indent',
 229   'whitespace/labels',
 230   'whitespace/line_length',
 231   'whitespace/newline',
 232   'whitespace/operators',
 233   'whitespace/parens',
 234   'whitespace/semicolon',
 235   'whitespace/tab',
 236   'whitespace/todo'
 237   ]
 238
 239 # The default state of the category filter. This is overrided by the --filter=
 240 # flag. By default all errors are on, so only add here categories that should be
 241 # off by default (i.e., categories that must be enabled by the --filter= flags).
 242 # All entries here should start with a '-' or '+', as in the --filter= flag.
 243 _DEFAULT_FILTERS = [ '-build/include_alpha' ]
 244
 245 # We used to check for high-bit characters, but after much discussion we
 246 # decided those were OK, as long as they were in UTF-8 and didn't represent
 247 # hard-coded international strings, which belong in a seperate i18n file.
 248
 249 # Headers that we consider STL headers.
 250 _STL_HEADERS = frozenset([
 251     'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
 252     'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
 253     'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'new',
 254     'pair.h', 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
 255     'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
 256     'utility', 'vector', 'vector.h',
 257     ])
 258
 259
 260 # Non-STL C++ system headers.
 261 _CPP_HEADERS = frozenset([
 262     'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
 263     'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
 264     'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
 265     'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
 266     'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
 267     'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
 268     'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
 269     'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
 270     'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
 271     'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
 272     'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
 273     'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
 274     'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
 275     ])
 276
 277
 278 # Assertion macros.  These are defined in base/logging.h and
 279 # testing/base/gunit.h.  Note that the _M versions need to come first
 280 # for substring matching to work.
 281 _CHECK_MACROS = [
 282     'DCHECK', 'CHECK',
 283     'EXPECT_TRUE_M', 'EXPECT_TRUE',
 284     'ASSERT_TRUE_M', 'ASSERT_TRUE',
 285     'EXPECT_FALSE_M', 'EXPECT_FALSE',
 286     'ASSERT_FALSE_M', 'ASSERT_FALSE',
 287     ]
 288
 289 # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
 290 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
 291
 292 for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
 293                         ('>=', 'GE'), ('>', 'GT'),
 294                         ('<=', 'LE'), ('<', 'LT')]:
 295   _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
 296   _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
 297   _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
 298   _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
 299   _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
 300   _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
 301
 302 for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
 303                             ('>=', 'LT'), ('>', 'LE'),
 304                             ('<=', 'GT'), ('<', 'GE')]:
 305   _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
 306   _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
 307   _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
 308   _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
 309
 310
 311 # These constants define types of headers for use with
 312 # _IncludeState.CheckNextIncludeOrder().
 313 _C_SYS_HEADER = 1
 314 _CPP_SYS_HEADER = 2
 315 _LIKELY_MY_HEADER = 3
 316 _POSSIBLE_MY_HEADER = 4
 317 _OTHER_HEADER = 5
 318
 319
 320 _regexp_compile_cache = {}
 321
 322 # Finds occurrences of NOLINT or NOLINT(...).
 323 _RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
 324
 325 # {str, set(int)}: a map from error categories to sets of linenumbers
 326 # on which those errors are expected and should be suppressed.
 327 _error_suppressions = {}
 328
 329 def ParseNolintSuppressions(filename, raw_line, linenum, error):
 330   """Updates the global list of error-suppressions.
 331
 332   Parses any NOLINT comments on the current line, updating the global
 333   error_suppressions store.  Reports an error if the NOLINT comment
 334   was malformed.
 335
 336   Args:
 337     filename: str, the name of the input file.
 338     raw_line: str, the line of input text, with comments.
 339     linenum: int, the number of the current line.
 340     error: function, an error handler.
 341   """
 342   # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
 343   m = _RE_SUPPRESSION.search(raw_line)
 344   if m:
 345     category = m.group(1)
 346     if category in (None, '(*)'):  # => "suppress all"
 347       _error_suppressions.setdefault(None, set()).add(linenum)
 348     else:
 349       if category.startswith('(') and category.endswith(')'):
 350         category = category[1:-1]
 351         if category in _ERROR_CATEGORIES:
 352           _error_suppressions.setdefault(category, set()).add(linenum)
 353         else:
 354           error(filename, linenum, 'readability/nolint', 5,
 355             'Unknown NOLINT error category: %s' % category)
 356
 357
 358 def ResetNolintSuppressions():
 359   "Resets the set of NOLINT suppressions to empty."
 360   _error_suppressions.clear()
 361
 362
 363 def IsErrorSuppressedByNolint(category, linenum):
 364   """Returns true if the specified error category is suppressed on this line.
 365
 366   Consults the global error_suppressions map populated by
 367   ParseNolintSuppressions/ResetNolintSuppressions.
 368
 369   Args:
 370     category: str, the category of the error.
 371     linenum: int, the current line number.
 372   Returns:
 373     bool, True iff the error should be suppressed due to a NOLINT comment.
 374   """
 375   return (linenum in _error_suppressions.get(category, set()) or
 376           linenum in _error_suppressions.get(None, set()))
 377
 378 def Match(pattern, s):
 379   """Matches the string with the pattern, caching the compiled regexp."""
 380   # The regexp compilation caching is inlined in both Match and Search for
 381   # performance reasons; factoring it out into a separate function turns out
 382   # to be noticeably expensive.
 383   if not pattern in _regexp_compile_cache:
 384     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
 385   return _regexp_compile_cache[pattern].match(s)
 386
 387
 388 def Search(pattern, s):
 389   """Searches the string for the pattern, caching the compiled regexp."""
 390   if not pattern in _regexp_compile_cache:
 391     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
 392   return _regexp_compile_cache[pattern].search(s)
 393
 394
 395 class _IncludeState(dict):
 396   """Tracks line numbers for includes, and the order in which includes appear.
 397
 398   As a dict, an _IncludeState object serves as a mapping between include
 399   filename and line number on which that file was included.
 400
 401   Call CheckNextIncludeOrder() once for each header in the file, passing
 402   in the type constants defined above. Calls in an illegal order will
 403   raise an _IncludeError with an appropriate error message.
 404
 405   """
 406   # self._section will move monotonically through this set. If it ever
 407   # needs to move backwards, CheckNextIncludeOrder will raise an error.
 408   _INITIAL_SECTION = 0
 409   _MY_H_SECTION = 1
 410   _C_SECTION = 2
 411   _CPP_SECTION = 3
 412   _OTHER_H_SECTION = 4
 413
 414   _TYPE_NAMES = {
 415       _C_SYS_HEADER: 'C system header',
 416       _CPP_SYS_HEADER: 'C++ system header',
 417       _LIKELY_MY_HEADER: 'header this file implements',
 418       _POSSIBLE_MY_HEADER: 'header this file may implement',
 419       _OTHER_HEADER: 'other header',
 420       }
 421   _SECTION_NAMES = {
 422       _INITIAL_SECTION: "... nothing. (This can't be an error.)",
 423       _MY_H_SECTION: 'a header this file implements',
 424       _C_SECTION: 'C system header',
 425       _CPP_SECTION: 'C++ system header',
 426       _OTHER_H_SECTION: 'other header',
 427       }
 428
 429   def __init__(self):
 430     dict.__init__(self)
 431     # The name of the current section.
 432     self._section = self._INITIAL_SECTION
 433     # The path of last found header.
 434     self._last_header = ''
 435
 436   def CanonicalizeAlphabeticalOrder(self, header_path):
 437     """Returns a path canonicalized for alphabetical comparisson.
 438
 439     - replaces "-" with "_" so they both cmp the same.
 440     - removes '-inl' since we don't require them to be after the main header.
 441     - lowercase everything, just in case.
 442
 443     Args:
 444       header_path: Path to be canonicalized.
 445
 446     Returns:
 447       Canonicalized path.
 448     """
 449     return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
 450
 451   def IsInAlphabeticalOrder(self, header_path):
 452     """Check if a header is in alphabetical order with the previous header.
 453
 454     Args:
 455       header_path: Header to be checked.
 456
 457     Returns:
 458       Returns true if the header is in alphabetical order.
 459     """
 460     canonical_header = self.CanonicalizeAlphabeticalOrder(header_path)
 461     if self._last_header > canonical_header:
 462       return False
 463     self._last_header = canonical_header
 464     return True
 465
 466   def CheckNextIncludeOrder(self, header_type):
 467     """Returns a non-empty error message if the next header is out of order.
 468
 469     This function also updates the internal state to be ready to check
 470     the next include.
 471
 472     Args:
 473       header_type: One of the _XXX_HEADER constants defined above.
 474
 475     Returns:
 476       The empty string if the header is in the right order, or an
 477       error message describing what's wrong.
 478
 479     """
 480     error_message = ('Found %s after %s' %
 481                      (self._TYPE_NAMES[header_type],
 482                       self._SECTION_NAMES[self._section]))
 483
 484     last_section = self._section
 485
 486     if header_type == _C_SYS_HEADER:
 487       if self._section <= self._C_SECTION:
 488         self._section = self._C_SECTION
 489       else:
 490         self._last_header = ''
 491         return error_message
 492     elif header_type == _CPP_SYS_HEADER:
 493       if self._section <= self._CPP_SECTION:
 494         self._section = self._CPP_SECTION
 495       else:
 496         self._last_header = ''
 497         return error_message
 498     elif header_type == _LIKELY_MY_HEADER:
 499       if self._section <= self._MY_H_SECTION:
 500         self._section = self._MY_H_SECTION
 501       else:
 502         self._section = self._OTHER_H_SECTION
 503     elif header_type == _POSSIBLE_MY_HEADER:
 504       if self._section <= self._MY_H_SECTION:
 505         self._section = self._MY_H_SECTION
 506       else:
 507         # This will always be the fallback because we're not sure
 508         # enough that the header is associated with this file.
 509         self._section = self._OTHER_H_SECTION
 510     else:
 511       assert header_type == _OTHER_HEADER
 512       self._section = self._OTHER_H_SECTION
 513
 514     if last_section != self._section:
 515       self._last_header = ''
 516
 517     return ''
 518
 519
 520 class _CppLintState(object):
 521   """Maintains module-wide state.."""
 522
 523   def __init__(self):
 524     self.verbose_level = 1  # global setting.
 525     self.error_count = 0    # global count of reported errors
 526     # filters to apply when emitting error messages
 527     self.filters = _DEFAULT_FILTERS[:]
 528     self.counting = 'total'  # In what way are we counting errors?
 529     self.errors_by_category = {}  # string to int dict storing error counts
 530
 531     # output format:
 532     # "emacs" - format that emacs can parse (default)
 533     # "vs7" - format that Microsoft Visual Studio 7 can parse
 534     self.output_format = 'emacs'
 535
 536   def SetOutputFormat(self, output_format):
 537     """Sets the output format for errors."""
 538     self.output_format = output_format
 539
 540   def SetVerboseLevel(self, level):
 541     """Sets the module's verbosity, and returns the previous setting."""
 542     last_verbose_level = self.verbose_level
 543     self.verbose_level = level
 544     return last_verbose_level
 545
 546   def SetCountingStyle(self, counting_style):
 547     """Sets the module's counting options."""
 548     self.counting = counting_style
 549
 550   def SetFilters(self, filters):
 551     """Sets the error-message filters.
 552
 553     These filters are applied when deciding whether to emit a given
 554     error message.
 555
 556     Args:
 557       filters: A string of comma-separated filters (eg "+whitespace/indent").
 558                Each filter should start with + or -; else we die.
 559
 560     Raises:
 561       ValueError: The comma-separated filters did not all start with '+' or '-'.
 562                   E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
 563     """
 564     # Default filters always have less priority than the flag ones.
 565     self.filters = _DEFAULT_FILTERS[:]
 566     for filt in filters.split(','):
 567       clean_filt = filt.strip()
 568       if clean_filt:
 569         self.filters.append(clean_filt)
 570     for filt in self.filters:
 571       if not (filt.startswith('+') or filt.startswith('-')):
 572         raise ValueError('Every filter in --filters must start with + or -'
 573                          ' (%s does not)' % filt)
 574
 575   def ResetErrorCounts(self):
 576     """Sets the module's error statistic back to zero."""
 577     self.error_count = 0
 578     self.errors_by_category = {}
 579
 580   def IncrementErrorCount(self, category):
 581     """Bumps the module's error statistic."""
 582     self.error_count += 1
 583     if self.counting in ('toplevel', 'detailed'):
 584       if self.counting != 'detailed':
 585         category = category.split('/')[0]
 586       if category not in self.errors_by_category:
 587         self.errors_by_category[category] = 0
 588       self.errors_by_category[category] += 1
 589
 590   def PrintErrorCounts(self):
 591     """Print a summary of errors by category, and the total."""
 592     for category, count in self.errors_by_category.iteritems():
 593       sys.stderr.write('Category \'%s\' errors found: %d\n' %
 594                        (category, count))
 595     sys.stderr.write('Total errors found: %d\n' % self.error_count)
 596
 597 _cpplint_state = _CppLintState()
 598
 599
 600 def _OutputFormat():
 601   """Gets the module's output format."""
 602   return _cpplint_state.output_format
 603
 604
 605 def _SetOutputFormat(output_format):
 606   """Sets the module's output format."""
 607   _cpplint_state.SetOutputFormat(output_format)
 608
 609
 610 def _VerboseLevel():
 611   """Returns the module's verbosity setting."""
 612   return _cpplint_state.verbose_level
 613
 614
 615 def _SetVerboseLevel(level):
 616   """Sets the module's verbosity, and returns the previous setting."""
 617   return _cpplint_state.SetVerboseLevel(level)
 618
 619
 620 def _SetCountingStyle(level):
 621   """Sets the module's counting options."""
 622   _cpplint_state.SetCountingStyle(level)
 623
 624
 625 def _Filters():
 626   """Returns the module's list of output filters, as a list."""
 627   return _cpplint_state.filters
 628
 629
 630 def _SetFilters(filters):
 631   """Sets the module's error-message filters.
 632
 633   These filters are applied when deciding whether to emit a given
 634   error message.
 635
 636   Args:
 637     filters: A string of comma-separated filters (eg "whitespace/indent").
 638              Each filter should start with + or -; else we die.
 639   """
 640   _cpplint_state.SetFilters(filters)
 641
 642
 643 class _FunctionState(object):
 644   """Tracks current function name and the number of lines in its body."""
 645
 646   _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
 647   _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
 648
 649   def __init__(self):
 650     self.in_a_function = False
 651     self.lines_in_function = 0
 652     self.current_function = ''
 653
 654   def Begin(self, function_name):
 655     """Start analyzing function body.
 656
 657     Args:
 658       function_name: The name of the function being tracked.
 659     """
 660     self.in_a_function = True
 661     self.lines_in_function = 0
 662     self.current_function = function_name
 663
 664   def Count(self):
 665     """Count line in current function body."""
 666     if self.in_a_function:
 667       self.lines_in_function += 1
 668
 669   def Check(self, error, filename, linenum):
 670     """Report if too many lines in function body.
 671
 672     Args:
 673       error: The function to call with any errors found.
 674       filename: The name of the current file.
 675       linenum: The number of the line to check.
 676     """
 677     if Match(r'T(EST|est)', self.current_function):
 678       base_trigger = self._TEST_TRIGGER
 679     else:
 680       base_trigger = self._NORMAL_TRIGGER
 681     trigger = base_trigger * 2**_VerboseLevel()
 682
 683     if self.lines_in_function > trigger:
 684       error_level = int(math.log(self.lines_in_function / base_trigger, 2))
 685       # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
 686       if error_level > 5:
 687         error_level = 5
 688       error(filename, linenum, 'readability/fn_size', error_level,
 689             'Small and focused functions are preferred:'
 690             ' %s has %d non-comment lines'
 691             ' (error triggered by exceeding %d lines).'  % (
 692                 self.current_function, self.lines_in_function, trigger))
 693
 694   def End(self):
 695     """Stop analizing function body."""
 696     self.in_a_function = False
 697
 698
 699 class _IncludeError(Exception):
 700   """Indicates a problem with the include order in a file."""
 701   pass
 702
 703
 704 class FileInfo:
 705   """Provides utility functions for filenames.
 706
 707   FileInfo provides easy access to the components of a file's path
 708   relative to the project root.
 709   """
 710
 711   def __init__(self, filename):
 712     self._filename = filename
 713
 714   def FullName(self):
 715     """Make Windows paths like Unix."""
 716     return os.path.abspath(self._filename).replace('\\', '/')
 717
 718   def RepositoryName(self):
 719     """FullName after removing the local path to the repository.
 720
 721     If we have a real absolute path name here we can try to do something smart:
 722     detecting the root of the checkout and truncating /path/to/checkout from
 723     the name so that we get header guards that don't include things like
 724     "C:\Documents and Settings\..." or "/home/username/..." in them and thus
 725     people on different computers who have checked the source out to different
 726     locations won't see bogus errors.
 727     """
 728     fullname = self.FullName()
 729
 730     if os.path.exists(fullname):
 731       project_dir = os.path.dirname(fullname)
 732
 733       if os.path.exists(os.path.join(project_dir, ".svn")):
 734         # If there's a .svn file in the current directory, we recursively look
 735         # up the directory tree for the top of the SVN checkout
 736         root_dir = project_dir
 737         one_up_dir = os.path.dirname(root_dir)
 738         while os.path.exists(os.path.join(one_up_dir, ".svn")):
 739           root_dir = os.path.dirname(root_dir)
 740           one_up_dir = os.path.dirname(one_up_dir)
 741
 742         prefix = os.path.commonprefix([root_dir, project_dir])
 743         return fullname[len(prefix) + 1:]
 744
 745       # Not SVN? Try to find a git or hg top level directory by searching up
 746       # from the current path.
 747       root_dir = os.path.dirname(fullname)
 748       while (root_dir != os.path.dirname(root_dir) and
 749              not os.path.exists(os.path.join(root_dir, ".git")) and
 750              not os.path.exists(os.path.join(root_dir, ".hg"))):
 751         root_dir = os.path.dirname(root_dir)
 752
 753       if (os.path.exists(os.path.join(root_dir, ".git")) or
 754           os.path.exists(os.path.join(root_dir, ".hg"))):
 755         prefix = os.path.commonprefix([root_dir, project_dir])
 756         return fullname[len(prefix) + 1:]
 757
 758     # Don't know what to do; header guard warnings may be wrong...
 759     return fullname
 760
 761   def Split(self):
 762     """Splits the file into the directory, basename, and extension.
 763
 764     For 'chrome/browser/browser.cc', Split() would
 765     return ('chrome/browser', 'browser', '.cc')
 766
 767     Returns:
 768       A tuple of (directory, basename, extension).
 769     """
 770
 771     googlename = self.RepositoryName()
 772     project, rest = os.path.split(googlename)
 773     return (project,) + os.path.splitext(rest)
 774
 775   def BaseName(self):
 776     """File base name - text after the final slash, before the final period."""
 777     return self.Split()[1]
 778
 779   def Extension(self):
 780     """File extension - text following the final period."""
 781     return self.Split()[2]
 782
 783   def NoExtension(self):
 784     """File has no source file extension."""
 785     return '/'.join(self.Split()[0:2])
 786
 787   def IsSource(self):
 788     """File has a source file extension."""
 789     return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
 790
 791
 792 def _ShouldPrintError(category, confidence, linenum):
 793   """Returns true iff confidence >= verbose, category passes
 794   filter and is not NOLINT-suppressed."""
 795
 796   # There are three ways we might decide not to print an error message:
 797   # a "NOLINT(category)" comment appears in the source,
 798   # the verbosity level isn't high enough, or the filters filter it out.
 799   if IsErrorSuppressedByNolint(category, linenum):
 800     return False
 801   if confidence < _cpplint_state.verbose_level:
 802     return False
 803
 804   is_filtered = False
 805   for one_filter in _Filters():
 806     if one_filter.startswith('-'):
 807       if category.startswith(one_filter[1:]):
 808         is_filtered = True
 809     elif one_filter.startswith('+'):
 810       if category.startswith(one_filter[1:]):
 811         is_filtered = False
 812     else:
 813       assert False  # should have been checked for in SetFilter.
 814   if is_filtered:
 815     return False
 816
 817   return True
 818
 819
 820 def Error(filename, linenum, category, confidence, message):
 821   """Logs the fact we've found a lint error.
 822
 823   We log where the error was found, and also our confidence in the error,
 824   that is, how certain we are this is a legitimate style regression, and
 825   not a misidentification or a use that's sometimes justified.
 826
 827   False positives can be suppressed by the use of
 828   "cpplint(category)"  comments on the offending line.  These are
 829   parsed into _error_suppressions.
 830
 831   Args:
 832     filename: The name of the file containing the error.
 833     linenum: The number of the line containing the error.
 834     category: A string used to describe the "category" this bug
 835       falls under: "whitespace", say, or "runtime".  Categories
 836       may have a hierarchy separated by slashes: "whitespace/indent".
 837     confidence: A number from 1-5 representing a confidence score for
 838       the error, with 5 meaning that we are certain of the problem,
 839       and 1 meaning that it could be a legitimate construct.
 840     message: The error message.
 841   """
 842   if _ShouldPrintError(category, confidence, linenum):
 843     _cpplint_state.IncrementErrorCount(category)
 844     if _cpplint_state.output_format == 'vs7':
 845       sys.stderr.write('%s(%s):  %s  [%s] [%d]\n' % (
 846           filename, linenum, message, category, confidence))
 847     else:
 848       sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' % (
 849           filename, linenum, message, category, confidence))
 850
 851
 852 # Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
 853 _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
 854     r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
 855 # Matches strings.  Escape codes should already be removed by ESCAPES.
 856 _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
 857 # Matches characters.  Escape codes should already be removed by ESCAPES.
 858 _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
 859 # Matches multi-line C++ comments.
 860 # This RE is a little bit more complicated than one might expect, because we
 861 # have to take care of space removals tools so we can handle comments inside
 862 # statements better.
 863 # The current rule is: We only clear spaces from both sides when we're at the
 864 # end of the line. Otherwise, we try to remove spaces from the right side,
 865 # if this doesn't work we try on left side but only if there's a non-character
 866 # on the right.
 867 _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
 868     r"""(\s*/\*.*\*/\s*$|
 869             /\*.*\*/\s+|
 870          \s+/\*.*\*/(?=\W)|
 871             /\*.*\*/)""", re.VERBOSE)
 872
 873
 874 def IsCppString(line):
 875   """Does line terminate so, that the next symbol is in string constant.
 876
 877   This function does not consider single-line nor multi-line comments.
 878
 879   Args:
 880     line: is a partial line of code starting from the 0..n.
 881
 882   Returns:
 883     True, if next character appended to 'line' is inside a
 884     string constant.
 885   """
 886
 887   line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
 888   return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
 889
 890
 891 def FindNextMultiLineCommentStart(lines, lineix):
 892   """Find the beginning marker for a multiline comment."""
 893   while lineix < len(lines):
 894     if lines[lineix].strip().startswith('/*'):
 895       # Only return this marker if the comment goes beyond this line
 896       if lines[lineix].strip().find('*/', 2) < 0:
 897         return lineix
 898     lineix += 1
 899   return len(lines)
 900
 901
 902 def FindNextMultiLineCommentEnd(lines, lineix):
 903   """We are inside a comment, find the end marker."""
 904   while lineix < len(lines):
 905     if lines[lineix].strip().endswith('*/'):
 906       return lineix
 907     lineix += 1
 908   return len(lines)
 909
 910
 911 def RemoveMultiLineCommentsFromRange(lines, begin, end):
 912   """Clears a range of lines for multi-line comments."""
 913   # Having // dummy comments makes the lines non-empty, so we will not get
 914   # unnecessary blank line warnings later in the code.
 915   for i in range(begin, end):
 916     lines[i] = '// dummy'
 917
 918
 919 def RemoveMultiLineComments(filename, lines, error):
 920   """Removes multiline (c-style) comments from lines."""
 921   lineix = 0
 922   while lineix < len(lines):
 923     lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
 924     if lineix_begin >= len(lines):
 925       return
 926     lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
 927     if lineix_end >= len(lines):
 928       error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
 929             'Could not find end of multi-line comment')
 930       return
 931     RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
 932     lineix = lineix_end + 1
 933
 934
 935 def CleanseComments(line):
 936   """Removes //-comments and single-line C-style /* */ comments.
 937
 938   Args:
 939     line: A line of C++ source.
 940
 941   Returns:
 942     The line with single-line comments removed.
 943   """
 944   commentpos = line.find('//')
 945   if commentpos != -1 and not IsCppString(line[:commentpos]):
 946     line = line[:commentpos]
 947   # get rid of /* ... */
 948   return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
 949
 950
 951 class CleansedLines(object):
 952   """Holds 3 copies of all lines with different preprocessing applied to them.
 953
 954   1) elided member contains lines without strings and comments,
 955   2) lines member contains lines without comments, and
 956   3) raw member contains all the lines without processing.
 957   All these three members are of <type 'list'>, and of the same length.
 958   """
 959
 960   def __init__(self, lines):
 961     self.elided = []
 962     self.lines = []
 963     self.raw_lines = lines
 964     self.num_lines = len(lines)
 965     for linenum in range(len(lines)):
 966       self.lines.append(CleanseComments(lines[linenum]))
 967       elided = self._CollapseStrings(lines[linenum])
 968       self.elided.append(CleanseComments(elided))
 969
 970   def NumLines(self):
 971     """Returns the number of lines represented."""
 972     return self.num_lines
 973
 974   @@staticmethod
 975   def _CollapseStrings(elided):
 976     """Collapses strings and chars on a line to simple "" or '' blocks.
 977
 978     We nix strings first so we're not fooled by text like '"http://"'
 979
 980     Args:
 981       elided: The line being processed.
 982
 983     Returns:
 984       The line with collapsed strings.
 985     """
 986     if not _RE_PATTERN_INCLUDE.match(elided):
 987       # Remove escaped characters first to make quote/single quote collapsing
 988       # basic.  Things that look like escaped characters shouldn't occur
 989       # outside of strings and chars.
 990       elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
 991       elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
 992       elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
 993     return elided
 994
 995
 996 def CloseExpression(clean_lines, linenum, pos):
 997   """If input points to ( or { or [, finds the position that closes it.
 998
 999   If lines[linenum][pos] points to a '(' or '{' or '[', finds the the
1000   linenum/pos that correspond to the closing of the expression.
1001
1002   Args:
1003     clean_lines: A CleansedLines instance containing the file.
1004     linenum: The number of the line to check.
1005     pos: A position on the line.
1006
1007   Returns:
1008     A tuple (line, linenum, pos) pointer *past* the closing brace, or
1009     (line, len(lines), -1) if we never find a close.  Note we ignore
1010     strings and comments when matching; and the line we return is the
1011     'cleansed' line at linenum.
1012   """
1013
1014   line = clean_lines.elided[linenum]
1015   startchar = line[pos]
1016   if startchar not in '({[':
1017     return (line, clean_lines.NumLines(), -1)
1018   if startchar == '(': endchar = ')'
1019   if startchar == '[': endchar = ']'
1020   if startchar == '{': endchar = '}'
1021
1022   num_open = line.count(startchar) - line.count(endchar)
1023   while linenum < clean_lines.NumLines() and num_open > 0:
1024     linenum += 1
1025     line = clean_lines.elided[linenum]
1026     num_open += line.count(startchar) - line.count(endchar)
1027   # OK, now find the endchar that actually got us back to even
1028   endpos = len(line)
1029   while num_open >= 0:
1030     endpos = line.rfind(')', 0, endpos)
1031     num_open -= 1                 # chopped off another )
1032   return (line, linenum, endpos + 1)
1033
1034
1035 def CheckForCopyright(filename, lines, error):
1036   """Logs an error if no Copyright message appears at the top of the file."""
1037
1038   # We'll say it should occur by line 10. Don't forget there's a
1039   # dummy line at the front.
1040   for line in xrange(1, min(len(lines), 11)):
1041     if re.search(r'Copyright', lines[line], re.I): break
1042   else:                       # means no copyright line was found
1043     error(filename, 0, 'legal/copyright', 5,
1044           'No copyright message found.  '
1045           'You should have a line: "Copyright [year] <Copyright Owner>"')
1046
1047
1048 def GetHeaderGuardCPPVariable(filename):
1049   """Returns the CPP variable that should be used as a header guard.
1050
1051   Args:
1052     filename: The name of a C++ header file.
1053
1054   Returns:
1055     The CPP variable that should be used as a header guard in the
1056     named file.
1057
1058   """
1059
1060   # Restores original filename in case that cpplint is invoked from Emacs's
1061   # flymake.
1062   filename = re.sub(r'_flymake\.h$', '.h', filename)
1063
1064   fileinfo = FileInfo(filename)
1065   return re.sub(r'[-./\s]', '_', fileinfo.RepositoryName()).upper() + '_'
1066
1067
1068 def CheckForHeaderGuard(filename, lines, error):
1069   """Checks that the file contains a header guard.
1070
1071   Logs an error if no #ifndef header guard is present.  For other
1072   headers, checks that the full pathname is used.
1073
1074   Args:
1075     filename: The name of the C++ header file.
1076     lines: An array of strings, each representing a line of the file.
1077     error: The function to call with any errors found.
1078   """
1079
1080   cppvar = GetHeaderGuardCPPVariable(filename)
1081
1082   ifndef = None
1083   ifndef_linenum = 0
1084   define = None
1085   endif = None
1086   endif_linenum = 0
1087   for linenum, line in enumerate(lines):
1088     linesplit = line.split()
1089     if len(linesplit) >= 2:
1090       # find the first occurrence of #ifndef and #define, save arg
1091       if not ifndef and linesplit[0] == '#ifndef':
1092         # set ifndef to the header guard presented on the #ifndef line.
1093         ifndef = linesplit[1]
1094         ifndef_linenum = linenum
1095       if not define and linesplit[0] == '#define':
1096         define = linesplit[1]
1097     # find the last occurrence of #endif, save entire line
1098     if line.startswith('#endif'):
1099       endif = line
1100       endif_linenum = linenum
1101
1102   if not ifndef or not define or ifndef != define:
1103     error(filename, 0, 'build/header_guard', 5,
1104           'No #ifndef header guard found, suggested CPP variable is: %s' %
1105           cppvar)
1106     return
1107
1108   # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1109   # for backward compatibility.
1110   if ifndef != cppvar:
1111     error_level = 0
1112     if ifndef != cppvar + '_':
1113       error_level = 5
1114
1115     ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
1116                             error)
1117     error(filename, ifndef_linenum, 'build/header_guard', error_level,
1118           '#ifndef header guard has wrong style, please use: %s' % cppvar)
1119
1120   if endif != ('#endif  // %s' % cppvar):
1121     error_level = 0
1122     if endif != ('#endif  // %s' % (cppvar + '_')):
1123       error_level = 5
1124
1125     ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
1126                             error)
1127     error(filename, endif_linenum, 'build/header_guard', error_level,
1128           '#endif line should be "#endif  // %s"' % cppvar)
1129
1130
1131 def CheckForUnicodeReplacementCharacters(filename, lines, error):
1132   """Logs an error for each line containing Unicode replacement characters.
1133
1134   These indicate that either the file contained invalid UTF-8 (likely)
1135   or Unicode replacement characters (which it shouldn't).  Note that
1136   it's possible for this to throw off line numbering if the invalid
1137   UTF-8 occurred adjacent to a newline.
1138
1139   Args:
1140     filename: The name of the current file.
1141     lines: An array of strings, each representing a line of the file.
1142     error: The function to call with any errors found.
1143   """
1144   for linenum, line in enumerate(lines):
1145     if u'\ufffd' in line:
1146       error(filename, linenum, 'readability/utf8', 5,
1147             'Line contains invalid UTF-8 (or Unicode replacement character).')
1148
1149
1150 def CheckForNewlineAtEOF(filename, lines, error):
1151   """Logs an error if there is no newline char at the end of the file.
1152
1153   Args:
1154     filename: The name of the current file.
1155     lines: An array of strings, each representing a line of the file.
1156     error: The function to call with any errors found.
1157   """
1158
1159   # The array lines() was created by adding two newlines to the
1160   # original file (go figure), then splitting on \n.
1161   # To verify that the file ends in \n, we just have to make sure the
1162   # last-but-two element of lines() exists and is empty.
1163   if len(lines) < 3 or lines[-2]:
1164     error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1165           'Could not find a newline character at the end of the file.')
1166
1167
1168 def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1169   """Logs an error if we see /* ... */ or "..." that extend past one line.
1170
1171   /* ... */ comments are legit inside macros, for one line.
1172   Otherwise, we prefer // comments, so it's ok to warn about the
1173   other.  Likewise, it's ok for strings to extend across multiple
1174   lines, as long as a line continuation character (backslash)
1175   terminates each line. Although not currently prohibited by the C++
1176   style guide, it's ugly and unnecessary. We don't do well with either
1177   in this lint program, so we warn about both.
1178
1179   Args:
1180     filename: The name of the current file.
1181     clean_lines: A CleansedLines instance containing the file.
1182     linenum: The number of the line to check.
1183     error: The function to call with any errors found.
1184   """
1185   line = clean_lines.elided[linenum]
1186
1187   # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1188   # second (escaped) slash may trigger later \" detection erroneously.
1189   line = line.replace('\\\\', '')
1190
1191   if line.count('/*') > line.count('*/'):
1192     error(filename, linenum, 'readability/multiline_comment', 5,
1193           'Complex multi-line /*...*/-style comment found. '
1194           'Lint may give bogus warnings.  '
1195           'Consider replacing these with //-style comments, '
1196           'with #if 0...#endif, '
1197           'or with more clearly structured multi-line comments.')
1198
1199   if (line.count('"') - line.count('\\"')) % 2:
1200     error(filename, linenum, 'readability/multiline_string', 5,
1201           'Multi-line string ("...") found.  This lint script doesn\'t '
1202           'do well with such strings, and may give bogus warnings.  They\'re '
1203           'ugly and unnecessary, and you should use concatenation instead".')
1204
1205
1206 threading_list = (
1207     ('asctime(', 'asctime_r('),
1208     ('ctime(', 'ctime_r('),
1209     ('getgrgid(', 'getgrgid_r('),
1210     ('getgrnam(', 'getgrnam_r('),
1211     ('getlogin(', 'getlogin_r('),
1212     ('getpwnam(', 'getpwnam_r('),
1213     ('getpwuid(', 'getpwuid_r('),
1214     ('gmtime(', 'gmtime_r('),
1215     ('localtime(', 'localtime_r('),
1216     ('rand(', 'rand_r('),
1217     ('readdir(', 'readdir_r('),
1218     ('strtok(', 'strtok_r('),
1219     ('ttyname(', 'ttyname_r('),
1220     )
1221
1222
1223 def CheckPosixThreading(filename, clean_lines, linenum, error):
1224   """Checks for calls to thread-unsafe functions.
1225
1226   Much code has been originally written without consideration of
1227   multi-threading. Also, engineers are relying on their old experience;
1228   they have learned posix before threading extensions were added. These
1229   tests guide the engineers to use thread-safe functions (when using
1230   posix directly).
1231
1232   Args:
1233     filename: The name of the current file.
1234     clean_lines: A CleansedLines instance containing the file.
1235     linenum: The number of the line to check.
1236     error: The function to call with any errors found.
1237   """
1238   line = clean_lines.elided[linenum]
1239   for single_thread_function, multithread_safe_function in threading_list:
1240     ix = line.find(single_thread_function)
1241     # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
1242     if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1243                                 line[ix - 1] not in ('_', '.', '>'))):
1244       error(filename, linenum, 'runtime/threadsafe_fn', 2,
1245             'Consider using ' + multithread_safe_function +
1246             '...) instead of ' + single_thread_function +
1247             '...) for improved thread safety.')
1248
1249
1250 # Matches invalid increment: *count++, which moves pointer instead of
1251 # incrementing a value.
1252 _RE_PATTERN_INVALID_INCREMENT = re.compile(
1253     r'^\s*\*\w+(\+\+|--);')
1254
1255
1256 def CheckInvalidIncrement(filename, clean_lines, linenum, error):
1257   """Checks for invalid increment *count++.
1258
1259   For example following function:
1260   void increment_counter(int* count) {
1261     *count++;
1262   }
1263   is invalid, because it effectively does count++, moving pointer, and should
1264   be replaced with ++*count, (*count)++ or *count += 1.
1265
1266   Args:
1267     filename: The name of the current file.
1268     clean_lines: A CleansedLines instance containing the file.
1269     linenum: The number of the line to check.
1270     error: The function to call with any errors found.
1271   """
1272   line = clean_lines.elided[linenum]
1273   if _RE_PATTERN_INVALID_INCREMENT.match(line):
1274     error(filename, linenum, 'runtime/invalid_increment', 5,
1275           'Changing pointer instead of value (or unused value of operator*).')
1276
1277
1278 class _ClassInfo(object):
1279   """Stores information about a class."""
1280
1281   def __init__(self, name, linenum):
1282     self.name = name
1283     self.linenum = linenum
1284     self.seen_open_brace = False
1285     self.is_derived = False
1286     self.virtual_method_linenumber = None
1287     self.has_virtual_destructor = False
1288     self.brace_depth = 0
1289
1290
1291 class _ClassState(object):
1292   """Holds the current state of the parse relating to class declarations.
1293
1294   It maintains a stack of _ClassInfos representing the parser's guess
1295   as to the current nesting of class declarations. The innermost class
1296   is at the top (back) of the stack. Typically, the stack will either
1297   be empty or have exactly one entry.
1298   """
1299
1300   def __init__(self):
1301     self.classinfo_stack = []
1302
1303   def CheckFinished(self, filename, error):
1304     """Checks that all classes have been completely parsed.
1305
1306     Call this when all lines in a file have been processed.
1307     Args:
1308       filename: The name of the current file.
1309       error: The function to call with any errors found.
1310     """
1311     if self.classinfo_stack:
1312       # Note: This test can result in false positives if #ifdef constructs
1313       # get in the way of brace matching. See the testBuildClass test in
1314       # cpplint_unittest.py for an example of this.
1315       error(filename, self.classinfo_stack[0].linenum, 'build/class', 5,
1316             'Failed to find complete declaration of class %s' %
1317             self.classinfo_stack[0].name)
1318
1319
1320 def CheckForNonStandardConstructs(filename, clean_lines, linenum,
1321                                   class_state, error):
1322   """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
1323
1324   Complain about several constructs which gcc-2 accepts, but which are
1325   not standard C++.  Warning about these in lint is one way to ease the
1326   transition to new compilers.
1327   - put storage class first (e.g. "static const" instead of "const static").
1328   - "%lld" instead of %qd" in printf-type functions.
1329   - "%1$d" is non-standard in printf-type functions.
1330   - "\%" is an undefined character escape sequence.
1331   - text after #endif is not allowed.
1332   - invalid inner-style forward declaration.
1333   - >? and <? operators, and their >?= and <?= cousins.
1334   - classes with virtual methods need virtual destructors (compiler warning
1335     available, but not turned on yet.)
1336
1337   Additionally, check for constructor/destructor style violations and reference
1338   members, as it is very convenient to do so while checking for
1339   gcc-2 compliance.
1340
1341   Args:
1342     filename: The name of the current file.
1343     clean_lines: A CleansedLines instance containing the file.
1344     linenum: The number of the line to check.
1345     class_state: A _ClassState instance which maintains information about
1346                  the current stack of nested class declarations being parsed.
1347     error: A callable to which errors are reported, which takes 4 arguments:
1348            filename, line number, error level, and message
1349   """
1350
1351   # Remove comments from the line, but leave in strings for now.
1352   line = clean_lines.lines[linenum]
1353
1354   if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
1355     error(filename, linenum, 'runtime/printf_format', 3,
1356           '%q in format strings is deprecated.  Use %ll instead.')
1357
1358   if Search(r'printf\s*\(.*".*%\d+\$', line):
1359     error(filename, linenum, 'runtime/printf_format', 2,
1360           '%N$ formats are unconventional.  Try rewriting to avoid them.')
1361
1362   # Remove escaped backslashes before looking for undefined escapes.
1363   line = line.replace('\\\\', '')
1364
1365   if Search(r'("|\').*\\(%|\[|\(|{)', line):
1366     error(filename, linenum, 'build/printf_format', 3,
1367           '%, [, (, and { are undefined character escapes.  Unescape them.')
1368
1369   # For the rest, work with both comments and strings removed.
1370   line = clean_lines.elided[linenum]
1371
1372   if Search(r'\b(const|volatile|void|char|short|int|long'
1373             r'|float|double|signed|unsigned'
1374             r'|schar|u?int8|u?int16|u?int32|u?int64)'
1375             r'\s+(auto|register|static|extern|typedef)\b',
1376             line):
1377     error(filename, linenum, 'build/storage_class', 5,
1378           'Storage class (static, extern, typedef, etc) should be first.')
1379
1380   if Match(r'\s*#\s*endif\s*[^/\s]+', line):
1381     error(filename, linenum, 'build/endif_comment', 5,
1382           'Uncommented text after #endif is non-standard.  Use a comment.')
1383
1384   if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
1385     error(filename, linenum, 'build/forward_decl', 5,
1386           'Inner-style forward declarations are invalid.  Remove this line.')
1387
1388   if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
1389             line):
1390     error(filename, linenum, 'build/deprecated', 3,
1391           '>? and <? (max and min) operators are non-standard and deprecated.')
1392
1393   if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
1394     # TODO(unknown): Could it be expanded safely to arbitrary references,
1395     # without triggering too many false positives? The first
1396     # attempt triggered 5 warnings for mostly benign code in the regtest, hence
1397     # the restriction.
1398     # Here's the original regexp, for the reference:
1399     # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
1400     # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
1401     error(filename, linenum, 'runtime/member_string_references', 2,
1402           'const string& members are dangerous. It is much better to use '
1403           'alternatives, such as pointers or simple constants.')
1404
1405   # Track class entry and exit, and attempt to find cases within the
1406   # class declaration that don't meet the C++ style
1407   # guidelines. Tracking is very dependent on the code matching Google
1408   # style guidelines, but it seems to perform well enough in testing
1409   # to be a worthwhile addition to the checks.
1410   classinfo_stack = class_state.classinfo_stack
1411   # Look for a class declaration
1412   class_decl_match = Match(
1413       r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
1414   if class_decl_match:
1415     classinfo_stack.append(_ClassInfo(class_decl_match.group(3), linenum))
1416
1417   # Everything else in this function uses the top of the stack if it's
1418   # not empty.
1419   if not classinfo_stack:
1420     return
1421
1422   classinfo = classinfo_stack[-1]
1423
1424   # If the opening brace hasn't been seen look for it and also
1425   # parent class declarations.
1426   if not classinfo.seen_open_brace:
1427     # If the line has a ';' in it, assume it's a forward declaration or
1428     # a single-line class declaration, which we won't process.
1429     if line.find(';') != -1:
1430       classinfo_stack.pop()
1431       return
1432     classinfo.seen_open_brace = (line.find('{') != -1)
1433     # Look for a bare ':'
1434     if Search('(^|[^:]):($|[^:])', line):
1435       classinfo.is_derived = True
1436     if not classinfo.seen_open_brace:
1437       return  # Everything else in this function is for after open brace
1438
1439   # The class may have been declared with namespace or classname qualifiers.
1440   # The constructor and destructor will not have those qualifiers.
1441   base_classname = classinfo.name.split('::')[-1]
1442
1443   # Look for single-argument constructors that aren't marked explicit.
1444   # Technically a valid construct, but against style.
1445   args = Match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
1446                % re.escape(base_classname),
1447                line)
1448   if (args and
1449       args.group(1) != 'void' and
1450       not Match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
1451                 args.group(1).strip())):
1452     error(filename, linenum, 'runtime/explicit', 5,
1453           'Single-argument constructors should be marked explicit.')
1454
1455   # Look for methods declared virtual.
1456   if Search(r'\bvirtual\b', line):
1457     classinfo.virtual_method_linenumber = linenum
1458     # Only look for a destructor declaration on the same line. It would
1459     # be extremely unlikely for the destructor declaration to occupy
1460     # more than one line.
1461     if Search(r'~%s\s*\(' % base_classname, line):
1462       classinfo.has_virtual_destructor = True
1463
1464   # Look for class end.
1465   brace_depth = classinfo.brace_depth
1466   brace_depth = brace_depth + line.count('{') - line.count('}')
1467   if brace_depth <= 0:
1468     classinfo = classinfo_stack.pop()
1469     # Try to detect missing virtual destructor declarations.
1470     # For now, only warn if a non-derived class with virtual methods lacks
1471     # a virtual destructor. This is to make it less likely that people will
1472     # declare derived virtual destructors without declaring the base
1473     # destructor virtual.
1474     if ((classinfo.virtual_method_linenumber is not None) and
1475         (not classinfo.has_virtual_destructor) and
1476         (not classinfo.is_derived)):  # Only warn for base classes
1477       error(filename, classinfo.linenum, 'runtime/virtual', 4,
1478             'The class %s probably needs a virtual destructor due to '
1479             'having virtual method(s), one declared at line %d.'
1480             % (classinfo.name, classinfo.virtual_method_linenumber))
1481   else:
1482     classinfo.brace_depth = brace_depth
1483
1484
1485 def CheckSpacingForFunctionCall(filename, line, linenum, error):
1486   """Checks for the correctness of various spacing around function calls.
1487
1488   Args:
1489     filename: The name of the current file.
1490     line: The text of the line to check.
1491     linenum: The number of the line to check.
1492     error: The function to call with any errors found.
1493   """
1494
1495   # Since function calls often occur inside if/for/while/switch
1496   # expressions - which have their own, more liberal conventions - we
1497   # first see if we should be looking inside such an expression for a
1498   # function call, to which we can apply more strict standards.
1499   fncall = line    # if there's no control flow construct, look at whole line
1500   for pattern in (r'\bif\s*\((.*)\)\s*{',
1501                   r'\bfor\s*\((.*)\)\s*{',
1502                   r'\bwhile\s*\((.*)\)\s*[{;]',
1503                   r'\bswitch\s*\((.*)\)\s*{'):
1504     match = Search(pattern, line)
1505     if match:
1506       fncall = match.group(1)    # look inside the parens for function calls
1507       break
1508
1509   # Except in if/for/while/switch, there should never be space
1510   # immediately inside parens (eg "f( 3, 4 )").  We make an exception
1511   # for nested parens ( (a+b) + c ).  Likewise, there should never be
1512   # a space before a ( when it's a function argument.  I assume it's a
1513   # function argument when the char before the whitespace is legal in
1514   # a function name (alnum + _) and we're not starting a macro. Also ignore
1515   # pointers and references to arrays and functions coz they're too tricky:
1516   # we use a very simple way to recognize these:
1517   # " (something)(maybe-something)" or
1518   # " (something)(maybe-something," or
1519   # " (something)[something]"
1520   # Note that we assume the contents of [] to be short enough that
1521   # they'll never need to wrap.
1522   if (  # Ignore control structures.
1523       not Search(r'\b(if|for|while|switch|return|delete)\b', fncall) and
1524       # Ignore pointers/references to functions.
1525       not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
1526       # Ignore pointers/references to arrays.
1527       not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
1528     if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
1529       error(filename, linenum, 'whitespace/parens', 4,
1530             'Extra space after ( in function call')
1531     elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
1532       error(filename, linenum, 'whitespace/parens', 2,
1533             'Extra space after (')
1534     if (Search(r'\w\s+\(', fncall) and
1535         not Search(r'#\s*define|typedef', fncall)):
1536       error(filename, linenum, 'whitespace/parens', 4,
1537             'Extra space before ( in function call')
1538     # If the ) is followed only by a newline or a { + newline, assume it's
1539     # part of a control statement (if/while/etc), and don't complain
1540     if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
1541       error(filename, linenum, 'whitespace/parens', 2,
1542             'Extra space before )')
1543
1544
1545 def IsBlankLine(line):
1546   """Returns true if the given line is blank.
1547
1548   We consider a line to be blank if the line is empty or consists of
1549   only white spaces.
1550
1551   Args:
1552     line: A line of a string.
1553
1554   Returns:
1555     True, if the given line is blank.
1556   """
1557   return not line or line.isspace()
1558
1559
1560 def CheckForFunctionLengths(filename, clean_lines, linenum,
1561                             function_state, error):
1562   """Reports for long function bodies.
1563
1564   For an overview why this is done, see:
1565   http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
1566
1567   Uses a simplistic algorithm assuming other style guidelines
1568   (especially spacing) are followed.
1569   Only checks unindented functions, so class members are unchecked.
1570   Trivial bodies are unchecked, so constructors with huge initializer lists
1571   may be missed.
1572   Blank/comment lines are not counted so as to avoid encouraging the removal
1573   of vertical space and commments just to get through a lint check.
1574   NOLINT *on the last line of a function* disables this check.
1575
1576   Args:
1577     filename: The name of the current file.
1578     clean_lines: A CleansedLines instance containing the file.
1579     linenum: The number of the line to check.
1580     function_state: Current function name and lines in body so far.
1581     error: The function to call with any errors found.
1582   """
1583   lines = clean_lines.lines
1584   line = lines[linenum]
1585   raw = clean_lines.raw_lines
1586   raw_line = raw[linenum]
1587   joined_line = ''
1588
1589   starting_func = False
1590   regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
1591   match_result = Match(regexp, line)
1592   if match_result:
1593     # If the name is all caps and underscores, figure it's a macro and
1594     # ignore it, unless it's TEST or TEST_F.
1595     function_name = match_result.group(1).split()[-1]
1596     if function_name == 'TEST' or function_name == 'TEST_F' or (
1597         not Match(r'[A-Z_]+$', function_name)):
1598       starting_func = True
1599
1600   if starting_func:
1601     body_found = False
1602     for start_linenum in xrange(linenum, clean_lines.NumLines()):
1603       start_line = lines[start_linenum]
1604       joined_line += ' ' + start_line.lstrip()
1605       if Search(r'(;|})', start_line):  # Declarations and trivial functions
1606         body_found = True
1607         break                              # ... ignore
1608       elif Search(r'{', start_line):
1609         body_found = True
1610         function = Search(r'((\w|:)*)\(', line).group(1)
1611         if Match(r'TEST', function):    # Handle TEST... macros
1612           parameter_regexp = Search(r'(\(.*\))', joined_line)
1613           if parameter_regexp:             # Ignore bad syntax
1614             function += parameter_regexp.group(1)
1615         else:
1616           function += '()'
1617         function_state.Begin(function)
1618         break
1619     if not body_found:
1620       # No body for the function (or evidence of a non-function) was found.
1621       error(filename, linenum, 'readability/fn_size', 5,
1622             'Lint failed to find start of function body.')
1623   elif Match(r'^\}\s*$', line):  # function end
1624     function_state.Check(error, filename, linenum)
1625     function_state.End()
1626   elif not Match(r'^\s*$', line):
1627     function_state.Count()  # Count non-blank/non-comment lines.
1628
1629
1630 _RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
1631
1632
1633 def CheckComment(comment, filename, linenum, error):
1634   """Checks for common mistakes in TODO comments.
1635
1636   Args:
1637     comment: The text of the comment from the line in question.
1638     filename: The name of the current file.
1639     linenum: The number of the line to check.
1640     error: The function to call with any errors found.
1641   """
1642   match = _RE_PATTERN_TODO.match(comment)
1643   if match:
1644     # One whitespace is correct; zero whitespace is handled elsewhere.
1645     leading_whitespace = match.group(1)
1646     if len(leading_whitespace) > 1:
1647       error(filename, linenum, 'whitespace/todo', 2,
1648             'Too many spaces before TODO')
1649
1650     username = match.group(2)
1651     if not username:
1652       error(filename, linenum, 'readability/todo', 2,
1653             'Missing username in TODO; it should look like '
1654             '"// TODO(my_username): Stuff."')
1655
1656     middle_whitespace = match.group(3)
1657     # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
1658     if middle_whitespace != ' ' and middle_whitespace != '':
1659       error(filename, linenum, 'whitespace/todo', 2,
1660             'TODO(my_username) should be followed by a space')
1661
1662
1663 def CheckSpacing(filename, clean_lines, linenum, error):
1664   """Checks for the correctness of various spacing issues in the code.
1665
1666   Things we check for: spaces around operators, spaces after
1667   if/for/while/switch, no spaces around parens in function calls, two
1668   spaces between code and comment, don't start a block with a blank
1669   line, don't end a function with a blank line, don't have too many
1670   blank lines in a row.
1671
1672   Args:
1673     filename: The name of the current file.
1674     clean_lines: A CleansedLines instance containing the file.
1675     linenum: The number of the line to check.
1676     error: The function to call with any errors found.
1677   """
1678
1679   raw = clean_lines.raw_lines
1680   line = raw[linenum]
1681
1682   # Before nixing comments, check if the line is blank for no good
1683   # reason.  This includes the first line after a block is opened, and
1684   # blank lines at the end of a function (ie, right before a line like '}'
1685   if IsBlankLine(line):
1686     elided = clean_lines.elided
1687     prev_line = elided[linenum - 1]
1688     prevbrace = prev_line.rfind('{')
1689     # TODO(unknown): Don't complain if line before blank line, and line after,
1690     #                both start with alnums and are indented the same amount.
1691     #                This ignores whitespace at the start of a namespace block
1692     #                because those are not usually indented.
1693     if (prevbrace != -1 and prev_line[prevbrace:].find('}') == -1
1694         and prev_line[:prevbrace].find('namespace') == -1):
1695       # OK, we have a blank line at the start of a code block.  Before we
1696       # complain, we check if it is an exception to the rule: The previous
1697       # non-empty line has the paramters of a function header that are indented
1698       # 4 spaces (because they did not fit in a 80 column line when placed on
1699       # the same line as the function name).  We also check for the case where
1700       # the previous line is indented 6 spaces, which may happen when the
1701       # initializers of a constructor do not fit into a 80 column line.
1702       exception = False
1703       if Match(r' {6}\w', prev_line):  # Initializer list?
1704         # We are looking for the opening column of initializer list, which
1705         # should be indented 4 spaces to cause 6 space indentation afterwards.
1706         search_position = linenum-2
1707         while (search_position >= 0
1708                and Match(r' {6}\w', elided[search_position])):
1709           search_position -= 1
1710         exception = (search_position >= 0
1711                      and elided[search_position][:5] == '    :')
1712       else:
1713         # Search for the function arguments or an initializer list.  We use a
1714         # simple heuristic here: If the line is indented 4 spaces; and we have a
1715         # closing paren, without the opening paren, followed by an opening brace
1716         # or colon (for initializer lists) we assume that it is the last line of
1717         # a function header.  If we have a colon indented 4 spaces, it is an
1718         # initializer list.
1719         exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
1720                            prev_line)
1721                      or Match(r' {4}:', prev_line))
1722
1723       if not exception:
1724         error(filename, linenum, 'whitespace/blank_line', 2,
1725               'Blank line at the start of a code block.  Is this needed?')
1726     # This doesn't ignore whitespace at the end of a namespace block
1727     # because that is too hard without pairing open/close braces;
1728     # however, a special exception is made for namespace closing
1729     # brackets which have a comment containing "namespace".
1730     #
1731     # Also, ignore blank lines at the end of a block in a long if-else
1732     # chain, like this:
1733     #   if (condition1) {
1734     #     // Something followed by a blank line
1735     #
1736     #   } else if (condition2) {
1737     #     // Something else
1738     #   }
1739     if linenum + 1 < clean_lines.NumLines():
1740       next_line = raw[linenum + 1]
1741       if (next_line
1742           and Match(r'\s*}', next_line)
1743           and next_line.find('namespace') == -1
1744           and next_line.find('} else ') == -1):
1745         error(filename, linenum, 'whitespace/blank_line', 3,
1746               'Blank line at the end of a code block.  Is this needed?')
1747
1748   # Next, we complain if there's a comment too near the text
1749   commentpos = line.find('//')
1750   if commentpos != -1:
1751     # Check if the // may be in quotes.  If so, ignore it
1752     # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
1753     if (line.count('"', 0, commentpos) -
1754         line.count('\\"', 0, commentpos)) % 2 == 0:   # not in quotes
1755       # Allow one space for new scopes, two spaces otherwise:
1756       if (not Match(r'^\s*{ //', line) and
1757           ((commentpos >= 1 and
1758             line[commentpos-1] not in string.whitespace) or
1759            (commentpos >= 2 and
1760             line[commentpos-2] not in string.whitespace))):
1761         error(filename, linenum, 'whitespace/comments', 2,
1762               'At least two spaces is best between code and comments')
1763       # There should always be a space between the // and the comment
1764       commentend = commentpos + 2
1765       if commentend < len(line) and not line[commentend] == ' ':
1766         # but some lines are exceptions -- e.g. if they're big
1767         # comment delimiters like:
1768         # //----------------------------------------------------------
1769         # or are an empty C++ style Doxygen comment, like:
1770         # ///
1771         # or they begin with multiple slashes followed by a space:
1772         # //////// Header comment
1773         match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
1774                  Search(r'^/$', line[commentend:]) or
1775                  Search(r'^/+ ', line[commentend:]))
1776         if not match:
1777           error(filename, linenum, 'whitespace/comments', 4,
1778                 'Should have a space between // and comment')
1779       CheckComment(line[commentpos:], filename, linenum, error)
1780
1781   line = clean_lines.elided[linenum]  # get rid of comments and strings
1782
1783   # Don't try to do spacing checks for operator methods
1784   line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
1785
1786   # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
1787   # Otherwise not.  Note we only check for non-spaces on *both* sides;
1788   # sometimes people put non-spaces on one side when aligning ='s among
1789   # many lines (not that this is behavior that I approve of...)
1790   if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
1791     error(filename, linenum, 'whitespace/operators', 4,
1792           'Missing spaces around =')
1793
1794   # It's ok not to have spaces around binary operators like + - * /, but if
1795   # there's too little whitespace, we get concerned.  It's hard to tell,
1796   # though, so we punt on this one for now.  TODO.
1797
1798   # You should always have whitespace around binary operators.
1799   # Alas, we can't test < or > because they're legitimately used sans spaces
1800   # (a->b, vector<int> a).  The only time we can tell is a < with no >, and
1801   # only if it's not template params list spilling into the next line.
1802   match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
1803   if not match:
1804     # Note that while it seems that the '<[^<]*' term in the following
1805     # regexp could be simplified to '<.*', which would indeed match
1806     # the same class of strings, the [^<] means that searching for the
1807     # regexp takes linear rather than quadratic time.
1808     if not Search(r'<[^<]*,\s*$', line):  # template params spill
1809       match = Search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
1810   if match:
1811     error(filename, linenum, 'whitespace/operators', 3,
1812           'Missing spaces around %s' % match.group(1))
1813   # We allow no-spaces around << and >> when used like this: 10<<20, but
1814   # not otherwise (particularly, not when used as streams)
1815   match = Search(r'[^0-9\s](<<|>>)[^0-9\s]', line)
1816   if match:
1817     error(filename, linenum, 'whitespace/operators', 3,
1818           'Missing spaces around %s' % match.group(1))
1819
1820   # There shouldn't be space around unary operators
1821   match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
1822   if match:
1823     error(filename, linenum, 'whitespace/operators', 4,
1824           'Extra space for operator %s' % match.group(1))
1825
1826   # A pet peeve of mine: no spaces after an if, while, switch, or for
1827   match = Search(r' (if\(|for\(|while\(|switch\()', line)
1828   if match:
1829     error(filename, linenum, 'whitespace/parens', 5,
1830           'Missing space before ( in %s' % match.group(1))
1831
1832   # For if/for/while/switch, the left and right parens should be
1833   # consistent about how many spaces are inside the parens, and
1834   # there should either be zero or one spaces inside the parens.
1835   # We don't want: "if ( foo)" or "if ( foo   )".
1836   # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
1837   match = Search(r'\b(if|for|while|switch)\s*'
1838                  r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
1839                  line)
1840   if match:
1841     if len(match.group(2)) != len(match.group(4)):
1842       if not (match.group(3) == ';' and
1843               len(match.group(2)) == 1 + len(match.group(4)) or
1844               not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
1845         error(filename, linenum, 'whitespace/parens', 5,
1846               'Mismatching spaces inside () in %s' % match.group(1))
1847     if not len(match.group(2)) in [0, 1]:
1848       error(filename, linenum, 'whitespace/parens', 5,
1849             'Should have zero or one spaces inside ( and ) in %s' %
1850             match.group(1))
1851
1852   # You should always have a space after a comma (either as fn arg or operator)
1853   if Search(r',[^\s]', line):
1854     error(filename, linenum, 'whitespace/comma', 3,
1855           'Missing space after ,')
1856
1857   # Next we will look for issues with function calls.
1858   CheckSpacingForFunctionCall(filename, line, linenum, error)
1859
1860   # Except after an opening paren, you should have spaces before your braces.
1861   # And since you should never have braces at the beginning of a line, this is
1862   # an easy test.
1863   if Search(r'[^ (]{', line):
1864     error(filename, linenum, 'whitespace/braces', 5,
1865           'Missing space before {')
1866
1867   # Make sure '} else {' has spaces.
1868   if Search(r'}else', line):
1869     error(filename, linenum, 'whitespace/braces', 5,
1870           'Missing space before else')
1871
1872   # You shouldn't have spaces before your brackets, except maybe after
1873   # 'delete []' or 'new char * []'.
1874   if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
1875     error(filename, linenum, 'whitespace/braces', 5,
1876           'Extra space before [')
1877
1878   # You shouldn't have a space before a semicolon at the end of the line.
1879   # There's a special case for "for" since the style guide allows space before
1880   # the semicolon there.
1881   if Search(r':\s*;\s*$', line):
1882     error(filename, linenum, 'whitespace/semicolon', 5,
1883           'Semicolon defining empty statement. Use { } instead.')
1884   elif Search(r'^\s*;\s*$', line):
1885     error(filename, linenum, 'whitespace/semicolon', 5,
1886           'Line contains only semicolon. If this should be an empty statement, '
1887           'use { } instead.')
1888   elif (Search(r'\s+;\s*$', line) and
1889         not Search(r'\bfor\b', line)):
1890     error(filename, linenum, 'whitespace/semicolon', 5,
1891           'Extra space before last semicolon. If this should be an empty '
1892           'statement, use { } instead.')
1893
1894
1895 def GetPreviousNonBlankLine(clean_lines, linenum):
1896   """Return the most recent non-blank line and its line number.
1897
1898   Args:
1899     clean_lines: A CleansedLines instance containing the file contents.
1900     linenum: The number of the line to check.
1901
1902   Returns:
1903     A tuple with two elements.  The first element is the contents of the last
1904     non-blank line before the current line, or the empty string if this is the
1905     first non-blank line.  The second is the line number of that line, or -1
1906     if this is the first non-blank line.
1907   """
1908
1909   prevlinenum = linenum - 1
1910   while prevlinenum >= 0:
1911     prevline = clean_lines.elided[prevlinenum]
1912     if not IsBlankLine(prevline):     # if not a blank line...
1913       return (prevline, prevlinenum)
1914     prevlinenum -= 1
1915   return ('', -1)
1916
1917
1918 def CheckBraces(filename, clean_lines, linenum, error):
1919   """Looks for misplaced braces (e.g. at the end of line).
1920
1921   Args:
1922     filename: The name of the current file.
1923     clean_lines: A CleansedLines instance containing the file.
1924     linenum: The number of the line to check.
1925     error: The function to call with any errors found.
1926   """
1927
1928   line = clean_lines.elided[linenum]        # get rid of comments and strings
1929
1930   if Match(r'\s*{\s*$', line):
1931     # We allow an open brace to start a line in the case where someone
1932     # is using braces in a block to explicitly create a new scope,
1933     # which is commonly used to control the lifetime of
1934     # stack-allocated variables.  We don't detect this perfectly: we
1935     # just don't complain if the last non-whitespace character on the
1936     # previous non-blank line is ';', ':', '{', or '}'.
1937     prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
1938     if not Search(r'[;:}{]\s*$', prevline):
1939       error(filename, linenum, 'whitespace/braces', 4,
1940             '{ should almost always be at the end of the previous line')
1941
1942   # An else clause should be on the same line as the preceding closing brace.
1943   if Match(r'\s*else\s*', line):
1944     prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
1945     if Match(r'\s*}\s*$', prevline):
1946       error(filename, linenum, 'whitespace/newline', 4,
1947             'An else should appear on the same line as the preceding }')
1948
1949   # If braces come on one side of an else, they should be on both.
1950   # However, we have to worry about "else if" that spans multiple lines!
1951   if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
1952     if Search(r'}\s*else if([^{]*)$', line):       # could be multi-line if
1953       # find the ( after the if
1954       pos = line.find('else if')
1955       pos = line.find('(', pos)
1956       if pos > 0:
1957         (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
1958         if endline[endpos:].find('{') == -1:    # must be brace after if
1959           error(filename, linenum, 'readability/braces', 5,
1960                 'If an else has a brace on one side, it should have it on both')
1961     else:            # common case: else not followed by a multi-line if
1962       error(filename, linenum, 'readability/braces', 5,
1963             'If an else has a brace on one side, it should have it on both')
1964
1965   # Likewise, an else should never have the else clause on the same line
1966   if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
1967     error(filename, linenum, 'whitespace/newline', 4,
1968           'Else clause should never be on same line as else (use 2 lines)')
1969
1970   # In the same way, a do/while should never be on one line
1971   if Match(r'\s*do [^\s{]', line):
1972     error(filename, linenum, 'whitespace/newline', 4,
1973           'do/while clauses should not be on a single line')
1974
1975   # Braces shouldn't be followed by a ; unless they're defining a struct
1976   # or initializing an array.
1977   # We can't tell in general, but we can for some common cases.
1978   prevlinenum = linenum
1979   while True:
1980     (prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
1981     if Match(r'\s+{.*}\s*;', line) and not prevline.count(';'):
1982       line = prevline + line
1983     else:
1984       break
1985   if (Search(r'{.*}\s*;', line) and
1986       line.count('{') == line.count('}') and
1987       not Search(r'struct|class|enum|\s*=\s*{', line)):
1988     error(filename, linenum, 'readability/braces', 4,
1989           "You don't need a ; after a }")
1990
1991
1992 def ReplaceableCheck(operator, macro, line):
1993   """Determine whether a basic CHECK can be replaced with a more specific one.
1994
1995   For example suggest using CHECK_EQ instead of CHECK(a == b) and
1996   similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
1997
1998   Args:
1999     operator: The C++ operator used in the CHECK.
2000     macro: The CHECK or EXPECT macro being called.
2001     line: The current source line.
2002
2003   Returns:
2004     True if the CHECK can be replaced with a more specific one.
2005   """
2006
2007   # This matches decimal and hex integers, strings, and chars (in that order).
2008   match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
2009
2010   # Expression to match two sides of the operator with something that
2011   # looks like a literal, since CHECK(x == iterator) won't compile.
2012   # This means we can't catch all the cases where a more specific
2013   # CHECK is possible, but it's less annoying than dealing with
2014   # extraneous warnings.
2015   match_this = (r'\s*' + macro + r'\((\s*' +
2016                 match_constant + r'\s*' + operator + r'[^<>].*|'
2017                 r'.*[^<>]' + operator + r'\s*' + match_constant +
2018                 r'\s*\))')
2019
2020   # Don't complain about CHECK(x == NULL) or similar because
2021   # CHECK_EQ(x, NULL) won't compile (requires a cast).
2022   # Also, don't complain about more complex boolean expressions
2023   # involving && or || such as CHECK(a == b || c == d).
2024   return Match(match_this, line) and not Search(r'NULL|&&|\|\|', line)
2025
2026
2027 def CheckCheck(filename, clean_lines, linenum, error):
2028   """Checks the use of CHECK and EXPECT macros.
2029
2030   Args:
2031     filename: The name of the current file.
2032     clean_lines: A CleansedLines instance containing the file.
2033     linenum: The number of the line to check.
2034     error: The function to call with any errors found.
2035   """
2036
2037   # Decide the set of replacement macros that should be suggested
2038   raw_lines = clean_lines.raw_lines
2039   current_macro = ''
2040   for macro in _CHECK_MACROS:
2041     if raw_lines[linenum].find(macro) >= 0:
2042       current_macro = macro
2043       break
2044   if not current_macro:
2045     # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
2046     return
2047
2048   line = clean_lines.elided[linenum]        # get rid of comments and strings
2049
2050   # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
2051   for operator in ['==', '!=', '>=', '>', '<=', '<']:
2052     if ReplaceableCheck(operator, current_macro, line):
2053       error(filename, linenum, 'readability/check', 2,
2054             'Consider using %s instead of %s(a %s b)' % (
2055                 _CHECK_REPLACEMENT[current_macro][operator],
2056                 current_macro, operator))
2057       break
2058
2059
2060 def GetLineWidth(line):
2061   """Determines the width of the line in column positions.
2062
2063   Args:
2064     line: A string, which may be a Unicode string.
2065
2066   Returns:
2067     The width of the line in column positions, accounting for Unicode
2068     combining characters and wide characters.
2069   """
2070   if isinstance(line, unicode):
2071     width = 0
2072     for c in unicodedata.normalize('NFC', line):
2073       if unicodedata.east_asian_width(c) in ('W', 'F'):
2074         width += 2
2075       elif not unicodedata.combining(c):
2076         width += 1
2077     return width
2078   else:
2079     return len(line)
2080
2081
2082 def CheckStyle(filename, clean_lines, linenum, file_extension, error):
2083   """Checks rules from the 'C++ style rules' section of cppguide.html.
2084
2085   Most of these rules are hard to test (naming, comment style), but we
2086   do what we can.  In particular we check for 2-space indents, line lengths,
2087   tab usage, spaces inside code, etc.
2088
2089   Args:
2090     filename: The name of the current file.
2091     clean_lines: A CleansedLines instance containing the file.
2092     linenum: The number of the line to check.
2093     file_extension: The extension (without the dot) of the filename.
2094     error: The function to call with any errors found.
2095   """
2096
2097   raw_lines = clean_lines.raw_lines
2098   line = raw_lines[linenum]
2099
2100   if line.find('\t') != -1:
2101     error(filename, linenum, 'whitespace/tab', 1,
2102           'Tab found; better to use spaces')
2103
2104   # One or three blank spaces at the beginning of the line is weird; it's
2105   # hard to reconcile that with 2-space indents.
2106   # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
2107   # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
2108   # if(RLENGTH > 20) complain = 0;
2109   # if(match($0, " +(error|private|public|protected):")) complain = 0;
2110   # if(match(prev, "&& *$")) complain = 0;
2111   # if(match(prev, "\\|\\| *$")) complain = 0;
2112   # if(match(prev, "[\",=><] *$")) complain = 0;
2113   # if(match($0, " <<")) complain = 0;
2114   # if(match(prev, " +for \\(")) complain = 0;
2115   # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
2116   initial_spaces = 0
2117   cleansed_line = clean_lines.elided[linenum]
2118   while initial_spaces < len(line) and line[initial_spaces] == ' ':
2119     initial_spaces += 1
2120   if line and line[-1].isspace():
2121     error(filename, linenum, 'whitespace/end_of_line', 4,
2122           'Line ends in whitespace.  Consider deleting these extra spaces.')
2123   # There are certain situations we allow one space, notably for labels
2124   elif ((initial_spaces == 1 or initial_spaces == 3) and
2125         not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
2126     error(filename, linenum, 'whitespace/indent', 3,
2127           'Weird number of spaces at line-start.  '
2128           'Are you using a 2-space indent?')
2129   # Labels should always be indented at least one space.
2130   elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
2131                                                           line):
2132     error(filename, linenum, 'whitespace/labels', 4,
2133           'Labels should always be indented at least one space.  '
2134           'If this is a member-initializer list in a constructor or '
2135           'the base class list in a class definition, the colon should '
2136           'be on the following line.')
2137
2138
2139   # Check if the line is a header guard.
2140   is_header_guard = False
2141   if file_extension == 'h':
2142     cppvar = GetHeaderGuardCPPVariable(filename)
2143     if (line.startswith('#ifndef %s' % cppvar) or
2144         line.startswith('#define %s' % cppvar) or
2145         line.startswith('#endif  // %s' % cppvar)):
2146       is_header_guard = True
2147   # #include lines and header guards can be long, since there's no clean way to
2148   # split them.
2149   #
2150   # URLs can be long too.  It's possible to split these, but it makes them
2151   # harder to cut&paste.
2152   if (not line.startswith('#include') and not is_header_guard and
2153       not Match(r'^\s*//.*http(s?)://\S*$', line)):
2154     line_width = GetLineWidth(line)
2155     if line_width > 100:
2156       error(filename, linenum, 'whitespace/line_length', 4,
2157             'Lines should very rarely be longer than 100 characters')
2158     elif line_width > 80:
2159       error(filename, linenum, 'whitespace/line_length', 2,
2160             'Lines should be <= 80 characters long')
2161
2162   if (cleansed_line.count(';') > 1 and
2163       # for loops are allowed two ;'s (and may run over two lines).
2164       cleansed_line.find('for') == -1 and
2165       (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
2166        GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
2167       # It's ok to have many commands in a switch case that fits in 1 line
2168       not ((cleansed_line.find('case ') != -1 or
2169             cleansed_line.find('default:') != -1) and
2170            cleansed_line.find('break;') != -1)):
2171     error(filename, linenum, 'whitespace/newline', 4,
2172           'More than one command on the same line')
2173
2174   # Some more style checks
2175   CheckBraces(filename, clean_lines, linenum, error)
2176   CheckSpacing(filename, clean_lines, linenum, error)
2177   CheckCheck(filename, clean_lines, linenum, error)
2178
2179
2180 _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
2181 _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
2182 # Matches the first component of a filename delimited by -s and _s. That is:
2183 #  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
2184 #  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
2185 #  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
2186 #  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
2187 _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
2188
2189
2190 def _DropCommonSuffixes(filename):
2191   """Drops common suffixes like _test.cc or -inl.h from filename.
2192
2193   For example:
2194     >>> _DropCommonSuffixes('foo/foo-inl.h')
2195     'foo/foo'
2196     >>> _DropCommonSuffixes('foo/bar/foo.cc')
2197     'foo/bar/foo'
2198     >>> _DropCommonSuffixes('foo/foo_internal.h')
2199     'foo/foo'
2200     >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
2201     'foo/foo_unusualinternal'
2202
2203   Args:
2204     filename: The input filename.
2205
2206   Returns:
2207     The filename with the common suffix removed.
2208   """
2209   for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
2210                  'inl.h', 'impl.h', 'internal.h'):
2211     if (filename.endswith(suffix) and len(filename) > len(suffix) and
2212         filename[-len(suffix) - 1] in ('-', '_')):
2213       return filename[:-len(suffix) - 1]
2214   return os.path.splitext(filename)[0]
2215
2216
2217 def _IsTestFilename(filename):
2218   """Determines if the given filename has a suffix that identifies it as a test.
2219
2220   Args:
2221     filename: The input filename.
2222
2223   Returns:
2224     True if 'filename' looks like a test, False otherwise.
2225   """
2226   if (filename.endswith('_test.cc') or
2227       filename.endswith('_unittest.cc') or
2228       filename.endswith('_regtest.cc')):
2229     return True
2230   else:
2231     return False
2232
2233
2234 def _ClassifyInclude(fileinfo, include, is_system):
2235   """Figures out what kind of header 'include' is.
2236
2237   Args:
2238     fileinfo: The current file cpplint is running over. A FileInfo instance.
2239     include: The path to a #included file.
2240     is_system: True if the #include used <> rather than "".
2241
2242   Returns:
2243     One of the _XXX_HEADER constants.
2244
2245   For example:
2246     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
2247     _C_SYS_HEADER
2248     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
2249     _CPP_SYS_HEADER
2250     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
2251     _LIKELY_MY_HEADER
2252     >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
2253     ...                  'bar/foo_other_ext.h', False)
2254     _POSSIBLE_MY_HEADER
2255     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
2256     _OTHER_HEADER
2257   """
2258   # This is a list of all standard c++ header files, except
2259   # those already checked for above.
2260   is_stl_h = include in _STL_HEADERS
2261   is_cpp_h = is_stl_h or include in _CPP_HEADERS
2262
2263   if is_system:
2264     if is_cpp_h:
2265       return _CPP_SYS_HEADER
2266     else:
2267       return _C_SYS_HEADER
2268
2269   # If the target file and the include we're checking share a
2270   # basename when we drop common extensions, and the include
2271   # lives in . , then it's likely to be owned by the target file.
2272   target_dir, target_base = (
2273       os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
2274   include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
2275   if target_base == include_base and (
2276       include_dir == target_dir or
2277       include_dir == os.path.normpath(target_dir + '/../public')):
2278     return _LIKELY_MY_HEADER
2279
2280   # If the target and include share some initial basename
2281   # component, it's possible the target is implementing the
2282   # include, so it's allowed to be first, but we'll never
2283   # complain if it's not there.
2284   target_first_component = _RE_FIRST_COMPONENT.match(target_base)
2285   include_first_component = _RE_FIRST_COMPONENT.match(include_base)
2286   if (target_first_component and include_first_component and
2287       target_first_component.group(0) ==
2288       include_first_component.group(0)):
2289     return _POSSIBLE_MY_HEADER
2290
2291   return _OTHER_HEADER
2292
2293
2294
2295 def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
2296   """Check rules that are applicable to #include lines.
2297
2298   Strings on #include lines are NOT removed from elided line, to make
2299   certain tasks easier. However, to prevent false positives, checks
2300   applicable to #include lines in CheckLanguage must be put here.
2301
2302   Args:
2303     filename: The name of the current file.
2304     clean_lines: A CleansedLines instance containing the file.
2305     linenum: The number of the line to check.
2306     include_state: An _IncludeState instance in which the headers are inserted.
2307     error: The function to call with any errors found.
2308   """
2309   fileinfo = FileInfo(filename)
2310
2311   line = clean_lines.lines[linenum]
2312
2313   # "include" should use the new style "foo/bar.h" instead of just "bar.h"
2314   if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
2315     error(filename, linenum, 'build/include', 4,
2316           'Include the directory when naming .h files')
2317
2318   # we shouldn't include a file more than once. actually, there are a
2319   # handful of instances where doing so is okay, but in general it's
2320   # not.
2321   match = _RE_PATTERN_INCLUDE.search(line)
2322   if match:
2323     include = match.group(2)
2324     is_system = (match.group(1) == '<')
2325     if include in include_state:
2326       error(filename, linenum, 'build/include', 4,
2327             '"%s" already included at %s:%s' %
2328             (include, filename, include_state[include]))
2329     else:
2330       include_state[include] = linenum
2331
2332       # We want to ensure that headers appear in the right order:
2333       # 1) for foo.cc, foo.h  (preferred location)
2334       # 2) c system files
2335       # 3) cpp system files
2336       # 4) for foo.cc, foo.h  (deprecated location)
2337       # 5) other google headers
2338       #
2339       # We classify each include statement as one of those 5 types
2340       # using a number of techniques. The include_state object keeps
2341       # track of the highest type seen, and complains if we see a
2342       # lower type after that.
2343       error_message = include_state.CheckNextIncludeOrder(
2344           _ClassifyInclude(fileinfo, include, is_system))
2345       if error_message:
2346         error(filename, linenum, 'build/include_order', 4,
2347               '%s. Should be: %s.h, c system, c++ system, other.' %
2348               (error_message, fileinfo.BaseName()))
2349       if not include_state.IsInAlphabeticalOrder(include):
2350         error(filename, linenum, 'build/include_alpha', 4,
2351               'Include "%s" not in alphabetical order' % include)
2352
2353   # Look for any of the stream classes that are part of standard C++.
2354   match = _RE_PATTERN_INCLUDE.match(line)
2355   if match:
2356     include = match.group(2)
2357     if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
2358       # Many unit tests use cout, so we exempt them.
2359       if not _IsTestFilename(filename):
2360         error(filename, linenum, 'readability/streams', 3,
2361               'Streams are highly discouraged.')
2362
2363 def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
2364                   error):
2365   """Checks rules from the 'C++ language rules' section of cppguide.html.
2366
2367   Some of these rules are hard to test (function overloading, using
2368   uint32 inappropriately), but we do the best we can.
2369
2370   Args:
2371     filename: The name of the current file.
2372     clean_lines: A CleansedLines instance containing the file.
2373     linenum: The number of the line to check.
2374     file_extension: The extension (without the dot) of the filename.
2375     include_state: An _IncludeState instance in which the headers are inserted.
2376     error: The function to call with any errors found.
2377   """
2378   # If the line is empty or consists of entirely a comment, no need to
2379   # check it.
2380   line = clean_lines.elided[linenum]
2381   if not line:
2382     return
2383
2384   match = _RE_PATTERN_INCLUDE.search(line)
2385   if match:
2386     CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
2387     return
2388
2389   # Create an extended_line, which is the concatenation of the current and
2390   # next lines, for more effective checking of code that may span more than one
2391   # line.
2392   if linenum + 1 < clean_lines.NumLines():
2393     extended_line = line + clean_lines.elided[linenum + 1]
2394   else:
2395     extended_line = line
2396
2397   # Make Windows paths like Unix.
2398   fullname = os.path.abspath(filename).replace('\\', '/')
2399
2400   # TODO(unknown): figure out if they're using default arguments in fn proto.
2401
2402   # Check for non-const references in functions.  This is tricky because &
2403   # is also used to take the address of something.  We allow <> for templates,
2404   # (ignoring whatever is between the braces) and : for classes.
2405   # These are complicated re's.  They try to capture the following:
2406   # paren (for fn-prototype start), typename, &, varname.  For the const
2407   # version, we're willing for const to be before typename or after
2408   # Don't check the implemention on same line.
2409   fnline = line.split('{', 1)[0]
2410   if (len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) >
2411       len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
2412                      r'(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) +
2413       len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+\s+const(\s?&|&\s?)[\w]+',
2414                      fnline))):
2415
2416     # We allow non-const references in a few standard places, like functions
2417     # called "swap()" or iostream operators like "<<" or ">>".
2418     if not Search(
2419         r'(swap|Swap|operator[<>][<>])\s*\(\s*(?:[\w:]|<.*>)+\s*&',
2420         fnline):
2421       error(filename, linenum, 'runtime/references', 2,
2422             'Is this a non-const reference? '
2423             'If so, make const or use a pointer.')
2424
2425   # Check to see if they're using an conversion function cast.
2426   # I just try to capture the most common basic types, though there are more.
2427   # Parameterless conversion functions, such as bool(), are allowed as they are
2428   # probably a member operator declaration or default constructor.
2429   match = Search(
2430       r'(\bnew\s+)?\b'  # Grab 'new' operator, if it's there
2431       r'(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
2432   if match:
2433     # gMock methods are defined using some variant of MOCK_METHODx(name, type)
2434     # where type may be float(), int(string), etc.  Without context they are
2435     # virtually indistinguishable from int(x) casts.
2436     if (match.group(1) is None and  # If new operator, then this isn't a cast
2437         not Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line)):
2438       error(filename, linenum, 'readability/casting', 4,
2439             'Using deprecated casting style.  '
2440             'Use static_cast<%s>(...) instead' %
2441             match.group(2))
2442
2443   CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2444                   'static_cast',
2445                   r'\((int|float|double|bool|char|u?int(16|32|64))\)',
2446                   error)
2447   # This doesn't catch all cases.  Consider (const char * const)"hello".
2448   CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2449                   'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
2450
2451   # In addition, we look for people taking the address of a cast.  This
2452   # is dangerous -- casts can assign to temporaries, so the pointer doesn't
2453   # point where you think.
2454   if Search(
2455       r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
2456     error(filename, linenum, 'runtime/casting', 4,
2457           ('Are you taking an address of a cast?  '
2458            'This is dangerous: could be a temp var.  '
2459            'Take the address before doing the cast, rather than after'))
2460
2461   # Check for people declaring static/global STL strings at the top level.
2462   # This is dangerous because the C++ language does not guarantee that
2463   # globals with constructors are initialized before the first access.
2464   match = Match(
2465       r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
2466       line)
2467   # Make sure it's not a function.
2468   # Function template specialization looks like: "string foo<Type>(...".
2469   # Class template definitions look like: "string Foo<Type>::Method(...".
2470   if match and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
2471                          match.group(3)):
2472     error(filename, linenum, 'runtime/string', 4,
2473           'For a static/global string constant, use a C style string instead: '
2474           '"%schar %s[]".' %
2475           (match.group(1), match.group(2)))
2476
2477   # Check that we're not using RTTI outside of testing code.
2478   if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
2479     error(filename, linenum, 'runtime/rtti', 5,
2480           'Do not use dynamic_cast<>.  If you need to cast within a class '
2481           "hierarchy, use static_cast<> to upcast.  Google doesn't support "
2482           'RTTI.')
2483
2484   if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
2485     error(filename, linenum, 'runtime/init', 4,
2486           'You seem to be initializing a member variable with itself.')
2487
2488   if file_extension == 'h':
2489     # TODO(unknown): check that 1-arg constructors are explicit.
2490     #                How to tell it's a constructor?
2491     #                (handled in CheckForNonStandardConstructs for now)
2492     # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
2493     #                (level 1 error)
2494     pass
2495
2496   # Check if people are using the verboten C basic types.  The only exception
2497   # we regularly allow is "unsigned short port" for port.
2498   if Search(r'\bshort port\b', line):
2499     if not Search(r'\bunsigned short port\b', line):
2500       error(filename, linenum, 'runtime/int', 4,
2501             'Use "unsigned short" for ports, not "short"')
2502   else:
2503     match = Search(r'\b(short|long(?! +double)|long long)\b', line)
2504     if match:
2505       error(filename, linenum, 'runtime/int', 4,
2506             'Use int16/int64/etc, rather than the C type %s' % match.group(1))
2507
2508   # When snprintf is used, the second argument shouldn't be a literal.
2509   match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
2510   if match and match.group(2) != '0':
2511     # If 2nd arg is zero, snprintf is used to calculate size.
2512     error(filename, linenum, 'runtime/printf', 3,
2513           'If you can, use sizeof(%s) instead of %s as the 2nd arg '
2514           'to snprintf.' % (match.group(1), match.group(2)))
2515
2516   # Check if some verboten C functions are being used.
2517   if Search(r'\bsprintf\b', line):
2518     error(filename, linenum, 'runtime/printf', 5,
2519           'Never use sprintf.  Use snprintf instead.')
2520   match = Search(r'\b(strcpy|strcat)\b', line)
2521   if match:
2522     error(filename, linenum, 'runtime/printf', 4,
2523           'Almost always, snprintf is better than %s' % match.group(1))
2524
2525   if Search(r'\bsscanf\b', line):
2526     error(filename, linenum, 'runtime/printf', 1,
2527           'sscanf can be ok, but is slow and can overflow buffers.')
2528
2529   # Check if some verboten operator overloading is going on
2530   # TODO(unknown): catch out-of-line unary operator&:
2531   #   class X {};
2532   #   int operator&(const X& x) { return 42; }  // unary operator&
2533   # The trick is it's hard to tell apart from binary operator&:
2534   #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
2535   if Search(r'\boperator\s*&\s*\(\s*\)', line):
2536     error(filename, linenum, 'runtime/operator', 4,
2537           'Unary operator& is dangerous.  Do not use it.')
2538
2539   # Check for suspicious usage of "if" like
2540   # } if (a == b) {
2541   if Search(r'\}\s*if\s*\(', line):
2542     error(filename, linenum, 'readability/braces', 4,
2543           'Did you mean "else if"? If not, start a new line for "if".')
2544
2545   # Check for potential format string bugs like printf(foo).
2546   # We constrain the pattern not to pick things like DocidForPrintf(foo).
2547   # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
2548   match = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
2549   if match:
2550     error(filename, linenum, 'runtime/printf', 4,
2551           'Potential format string bug. Do %s("%%s", %s) instead.'
2552           % (match.group(1), match.group(2)))
2553
2554   # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
2555   match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
2556   if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
2557     error(filename, linenum, 'runtime/memset', 4,
2558           'Did you mean "memset(%s, 0, %s)"?'
2559           % (match.group(1), match.group(2)))
2560
2561   if Search(r'\busing namespace\b', line):
2562     error(filename, linenum, 'build/namespaces', 5,
2563           'Do not use namespace using-directives.  '
2564           'Use using-declarations instead.')
2565
2566   # Detect variable-length arrays.
2567   match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
2568   if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
2569       match.group(3).find(']') == -1):
2570     # Split the size using space and arithmetic operators as delimiters.
2571     # If any of the resulting tokens are not compile time constants then
2572     # report the error.
2573     tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
2574     is_const = True
2575     skip_next = False
2576     for tok in tokens:
2577       if skip_next:
2578         skip_next = False
2579         continue
2580
2581       if Search(r'sizeof\(.+\)', tok): continue
2582       if Search(r'arraysize\(\w+\)', tok): continue
2583
2584       tok = tok.lstrip('(')
2585       tok = tok.rstrip(')')
2586       if not tok: continue
2587       if Match(r'\d+', tok): continue
2588       if Match(r'0[xX][0-9a-fA-F]+', tok): continue
2589       if Match(r'k[A-Z0-9]\w*', tok): continue
2590       if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
2591       if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
2592       # A catch all for tricky sizeof cases, including 'sizeof expression',
2593       # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
2594       # requires skipping the next token becasue we split on ' ' and '*'.
2595       if tok.startswith('sizeof'):
2596         skip_next = True
2597         continue
2598       is_const = False
2599       break
2600     if not is_const:
2601       error(filename, linenum, 'runtime/arrays', 1,
2602             'Do not use variable-length arrays.  Use an appropriately named '
2603             "('k' followed by CamelCase) compile-time constant for the size.")
2604
2605   # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
2606   # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
2607   # in the class declaration.
2608   match = Match(
2609       (r'\s*'
2610        r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
2611        r'\(.*\);$'),
2612       line)
2613   if match and linenum + 1 < clean_lines.NumLines():
2614     next_line = clean_lines.elided[linenum + 1]
2615     if not Search(r'^\s*};', next_line):
2616       error(filename, linenum, 'readability/constructors', 3,
2617             match.group(1) + ' should be the last thing in the class')
2618
2619   # Check for use of unnamed namespaces in header files.  Registration
2620   # macros are typically OK, so we allow use of "namespace {" on lines
2621   # that end with backslashes.
2622   if (file_extension == 'h'
2623       and Search(r'\bnamespace\s*{', line)
2624       and line[-1] != '\\'):
2625     error(filename, linenum, 'build/namespaces', 4,
2626           'Do not use unnamed namespaces in header files.  See '
2627           'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
2628           ' for more information.')
2629
2630
2631 def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
2632                     error):
2633   """Checks for a C-style cast by looking for the pattern.
2634
2635   This also handles sizeof(type) warnings, due to similarity of content.
2636
2637   Args:
2638     filename: The name of the current file.
2639     linenum: The number of the line to check.
2640     line: The line of code to check.
2641     raw_line: The raw line of code to check, with comments.
2642     cast_type: The string for the C++ cast to recommend.  This is either
2643       reinterpret_cast or static_cast, depending.
2644     pattern: The regular expression used to find C-style casts.
2645     error: The function to call with any errors found.
2646   """
2647   match = Search(pattern, line)
2648   if not match:
2649     return
2650
2651   # e.g., sizeof(int)
2652   sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
2653   if sizeof_match:
2654     error(filename, linenum, 'runtime/sizeof', 1,
2655           'Using sizeof(type).  Use sizeof(varname) instead if possible')
2656     return
2657
2658   remainder = line[match.end(0):]
2659
2660   # The close paren is for function pointers as arguments to a function.
2661   # eg, void foo(void (*bar)(int));
2662   # The semicolon check is a more basic function check; also possibly a
2663   # function pointer typedef.
2664   # eg, void foo(int); or void foo(int) const;
2665   # The equals check is for function pointer assignment.
2666   # eg, void *(*foo)(int) = ...
2667   #
2668   # Right now, this will only catch cases where there's a single argument, and
2669   # it's unnamed.  It should probably be expanded to check for multiple
2670   # arguments with some unnamed.
2671   function_match = Match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
2672   if function_match:
2673     if (not function_match.group(3) or
2674         function_match.group(3) == ';' or
2675         raw_line.find('/*') < 0):
2676       error(filename, linenum, 'readability/function', 3,
2677             'All parameters should be named in a function')
2678     return
2679
2680   # At this point, all that should be left is actual casts.
2681   error(filename, linenum, 'readability/casting', 4,
2682         'Using C-style cast.  Use %s<%s>(...) instead' %
2683         (cast_type, match.group(1)))
2684
2685
2686 _HEADERS_CONTAINING_TEMPLATES = (
2687     ('<deque>', ('deque',)),
2688     ('<functional>', ('unary_function', 'binary_function',
2689                       'plus', 'minus', 'multiplies', 'divides', 'modulus',
2690                       'negate',
2691                       'equal_to', 'not_equal_to', 'greater', 'less',
2692                       'greater_equal', 'less_equal',
2693                       'logical_and', 'logical_or', 'logical_not',
2694                       'unary_negate', 'not1', 'binary_negate', 'not2',
2695                       'bind1st', 'bind2nd',
2696                       'pointer_to_unary_function',
2697                       'pointer_to_binary_function',
2698                       'ptr_fun',
2699                       'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
2700                       'mem_fun_ref_t',
2701                       'const_mem_fun_t', 'const_mem_fun1_t',
2702                       'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
2703                       'mem_fun_ref',
2704                      )),
2705     ('<limits>', ('numeric_limits',)),
2706     ('<list>', ('list',)),
2707     ('<map>', ('map', 'multimap',)),
2708     ('<memory>', ('allocator',)),
2709     ('<queue>', ('queue', 'priority_queue',)),
2710     ('<set>', ('set', 'multiset',)),
2711     ('<stack>', ('stack',)),
2712     ('<string>', ('char_traits', 'basic_string',)),
2713     ('<utility>', ('pair',)),
2714     ('<vector>', ('vector',)),
2715
2716     # gcc extensions.
2717     # Note: std::hash is their hash, ::hash is our hash
2718     ('<hash_map>', ('hash_map', 'hash_multimap',)),
2719     ('<hash_set>', ('hash_set', 'hash_multiset',)),
2720     ('<slist>', ('slist',)),
2721     )
2722
2723 _HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
2724     # We can trust with reasonable confidence that map gives us pair<>, too.
2725     'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
2726 }
2727
2728 _RE_PATTERN_STRING = re.compile(r'\bstring\b')
2729
2730 _re_pattern_algorithm_header = []
2731 for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
2732                   'transform'):
2733   # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
2734   # type::max().
2735   _re_pattern_algorithm_header.append(
2736       (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
2737        _template,
2738        '<algorithm>'))
2739
2740 _re_pattern_templates = []
2741 for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
2742   for _template in _templates:
2743     _re_pattern_templates.append(
2744         (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
2745          _template + '<>',
2746          _header))
2747
2748
2749 def FilesBelongToSameModule(filename_cc, filename_h):
2750   """Check if these two filenames belong to the same module.
2751
2752   The concept of a 'module' here is a as follows:
2753   foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
2754   same 'module' if they are in the same directory.
2755   some/path/public/xyzzy and some/path/internal/xyzzy are also considered
2756   to belong to the same module here.
2757
2758   If the filename_cc contains a longer path than the filename_h, for example,
2759   '/absolute/path/to/base/sysinfo.cc', and this file would include
2760   'base/sysinfo.h', this function also produces the prefix needed to open the
2761   header. This is used by the caller of this function to more robustly open the
2762   header file. We don't have access to the real include paths in this context,
2763   so we need this guesswork here.
2764
2765   Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
2766   according to this implementation. Because of this, this function gives
2767   some false positives. This should be sufficiently rare in practice.
2768
2769   Args:
2770     filename_cc: is the path for the .cc file
2771     filename_h: is the path for the header path
2772
2773   Returns:
2774     Tuple with a bool and a string:
2775     bool: True if filename_cc and filename_h belong to the same module.
2776     string: the additional prefix needed to open the header file.
2777   """
2778
2779   if not filename_cc.endswith('.cc'):
2780     return (False, '')
2781   filename_cc = filename_cc[:-len('.cc')]
2782   if filename_cc.endswith('_unittest'):
2783     filename_cc = filename_cc[:-len('_unittest')]
2784   elif filename_cc.endswith('_test'):
2785     filename_cc = filename_cc[:-len('_test')]
2786   filename_cc = filename_cc.replace('/public/', '/')
2787   filename_cc = filename_cc.replace('/internal/', '/')
2788
2789   if not filename_h.endswith('.h'):
2790     return (False, '')
2791   filename_h = filename_h[:-len('.h')]
2792   if filename_h.endswith('-inl'):
2793     filename_h = filename_h[:-len('-inl')]
2794   filename_h = filename_h.replace('/public/', '/')
2795   filename_h = filename_h.replace('/internal/', '/')
2796
2797   files_belong_to_same_module = filename_cc.endswith(filename_h)
2798   common_path = ''
2799   if files_belong_to_same_module:
2800     common_path = filename_cc[:-len(filename_h)]
2801   return files_belong_to_same_module, common_path
2802
2803
2804 def UpdateIncludeState(filename, include_state, io=codecs):
2805   """Fill up the include_state with new includes found from the file.
2806
2807   Args:
2808     filename: the name of the header to read.
2809     include_state: an _IncludeState instance in which the headers are inserted.
2810     io: The io factory to use to read the file. Provided for testability.
2811
2812   Returns:
2813     True if a header was succesfully added. False otherwise.
2814   """
2815   headerfile = None
2816   try:
2817     headerfile = io.open(filename, 'r', 'utf8', 'replace')
2818   except IOError:
2819     return False
2820   linenum = 0
2821   for line in headerfile:
2822     linenum += 1
2823     clean_line = CleanseComments(line)
2824     match = _RE_PATTERN_INCLUDE.search(clean_line)
2825     if match:
2826       include = match.group(2)
2827       # The value formatting is cute, but not really used right now.
2828       # What matters here is that the key is in include_state.
2829       include_state.setdefault(include, '%s:%d' % (filename, linenum))
2830   return True
2831
2832
2833 def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
2834                               io=codecs):
2835   """Reports for missing stl includes.
2836
2837   This function will output warnings to make sure you are including the headers
2838   necessary for the stl containers and functions that you use. We only give one
2839   reason to include a header. For example, if you use both equal_to<> and
2840   less<> in a .h file, only one (the latter in the file) of these will be
2841   reported as a reason to include the <functional>.
2842
2843   Args:
2844     filename: The name of the current file.
2845     clean_lines: A CleansedLines instance containing the file.
2846     include_state: An _IncludeState instance.
2847     error: The function to call with any errors found.
2848     io: The IO factory to use to read the header file. Provided for unittest
2849         injection.
2850   """
2851   required = {}  # A map of header name to linenumber and the template entity.
2852                  # Example of required: { '<functional>': (1219, 'less<>') }
2853
2854   for linenum in xrange(clean_lines.NumLines()):
2855     line = clean_lines.elided[linenum]
2856     if not line or line[0] == '#':
2857       continue
2858
2859     # String is special -- it is a non-templatized type in STL.
2860     m = _RE_PATTERN_STRING.search(line)
2861     if m:
2862       # Don't warn about strings in non-STL namespaces:
2863       # (We check only the first match per line; good enough.)
2864       prefix = line[:m.start()]
2865       if prefix.endswith('std::') or not prefix.endswith('::'):
2866         required['<string>'] = (linenum, 'string')
2867
2868     for pattern, template, header in _re_pattern_algorithm_header:
2869       if pattern.search(line):
2870         required[header] = (linenum, template)
2871
2872     # The following function is just a speed up, no semantics are changed.
2873     if not '<' in line:  # Reduces the cpu time usage by skipping lines.
2874       continue
2875
2876     for pattern, template, header in _re_pattern_templates:
2877       if pattern.search(line):
2878         required[header] = (linenum, template)
2879
2880   # The policy is that if you #include something in foo.h you don't need to
2881   # include it again in foo.cc. Here, we will look at possible includes.
2882   # Let's copy the include_state so it is only messed up within this function.
2883   include_state = include_state.copy()
2884
2885   # Did we find the header for this file (if any) and succesfully load it?
2886   header_found = False
2887
2888   # Use the absolute path so that matching works properly.
2889   abs_filename = os.path.abspath(filename)
2890
2891   # For Emacs's flymake.
2892   # If cpplint is invoked from Emacs's flymake, a temporary file is generated
2893   # by flymake and that file name might end with '_flymake.cc'. In that case,
2894   # restore original file name here so that the corresponding header file can be
2895   # found.
2896   # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
2897   # instead of 'foo_flymake.h'
2898   abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
2899
2900   # include_state is modified during iteration, so we iterate over a copy of
2901   # the keys.
2902   for header in include_state.keys():  #NOLINT
2903     (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
2904     fullpath = common_path + header
2905     if same_module and UpdateIncludeState(fullpath, include_state, io):
2906       header_found = True
2907
2908   # If we can't find the header file for a .cc, assume it's because we don't
2909   # know where to look. In that case we'll give up as we're not sure they
2910   # didn't include it in the .h file.
2911   # TODO(unknown): Do a better job of finding .h files so we are confident that
2912   # not having the .h file means there isn't one.
2913   if filename.endswith('.cc') and not header_found:
2914     return
2915
2916   # All the lines have been processed, report the errors found.
2917   for required_header_unstripped in required:
2918     template = required[required_header_unstripped][1]
2919     if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
2920       headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
2921       if [True for header in headers if header in include_state]:
2922         continue
2923     if required_header_unstripped.strip('<>"') not in include_state:
2924       error(filename, required[required_header_unstripped][0],
2925             'build/include_what_you_use', 4,
2926             'Add #include ' + required_header_unstripped + ' for ' + template)
2927
2928
2929 def ProcessLine(filename, file_extension,
2930                 clean_lines, line, include_state, function_state,
2931                 class_state, error):
2932   """Processes a single line in the file.
2933
2934   Args:
2935     filename: Filename of the file that is being processed.
2936     file_extension: The extension (dot not included) of the file.
2937     clean_lines: An array of strings, each representing a line of the file,
2938                  with comments stripped.
2939     line: Number of line being processed.
2940     include_state: An _IncludeState instance in which the headers are inserted.
2941     function_state: A _FunctionState instance which counts function lines, etc.
2942     class_state: A _ClassState instance which maintains information about
2943                  the current stack of nested class declarations being parsed.
2944     error: A callable to which errors are reported, which takes 4 arguments:
2945            filename, line number, error level, and message
2946
2947   """
2948   raw_lines = clean_lines.raw_lines
2949   ParseNolintSuppressions(filename, raw_lines[line], line, error)
2950   CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
2951   CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
2952   CheckStyle(filename, clean_lines, line, file_extension, error)
2953   CheckLanguage(filename, clean_lines, line, file_extension, include_state,
2954                 error)
2955   CheckForNonStandardConstructs(filename, clean_lines, line,
2956                                 class_state, error)
2957   CheckPosixThreading(filename, clean_lines, line, error)
2958   CheckInvalidIncrement(filename, clean_lines, line, error)
2959
2960
2961 def ProcessFileData(filename, file_extension, lines, error):
2962   """Performs lint checks and reports any errors to the given error function.
2963
2964   Args:
2965     filename: Filename of the file that is being processed.
2966     file_extension: The extension (dot not included) of the file.
2967     lines: An array of strings, each representing a line of the file, with the
2968            last element being empty if the file is termined with a newline.
2969     error: A callable to which errors are reported, which takes 4 arguments:
2970   """
2971   lines = (['// marker so line numbers and indices both start at 1'] + lines +
2972            ['// marker so line numbers end in a known way'])
2973
2974   include_state = _IncludeState()
2975   function_state = _FunctionState()
2976   class_state = _ClassState()
2977
2978   ResetNolintSuppressions()
2979
2980   CheckForCopyright(filename, lines, error)
2981
2982   if file_extension == 'h':
2983     CheckForHeaderGuard(filename, lines, error)
2984
2985   RemoveMultiLineComments(filename, lines, error)
2986   clean_lines = CleansedLines(lines)
2987   for line in xrange(clean_lines.NumLines()):
2988     ProcessLine(filename, file_extension, clean_lines, line,
2989                 include_state, function_state, class_state, error)
2990   class_state.CheckFinished(filename, error)
2991
2992   CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
2993
2994   # We check here rather than inside ProcessLine so that we see raw
2995   # lines rather than "cleaned" lines.
2996   CheckForUnicodeReplacementCharacters(filename, lines, error)
2997
2998   CheckForNewlineAtEOF(filename, lines, error)
2999
3000 def ProcessFile(filename, vlevel):
3001   """Does google-lint on a single file.
3002
3003   Args:
3004     filename: The name of the file to parse.
3005
3006     vlevel: The level of errors to report.  Every error of confidence
3007     >= verbose_level will be reported.  0 is a good default.
3008   """
3009
3010   _SetVerboseLevel(vlevel)
3011
3012   try:
3013     # Support the UNIX convention of using "-" for stdin.  Note that
3014     # we are not opening the file with universal newline support
3015     # (which codecs doesn't support anyway), so the resulting lines do
3016     # contain trailing '\r' characters if we are reading a file that
3017     # has CRLF endings.
3018     # If after the split a trailing '\r' is present, it is removed
3019     # below. If it is not expected to be present (i.e. os.linesep !=
3020     # '\r\n' as in Windows), a warning is issued below if this file
3021     # is processed.
3022
3023     if filename == '-':
3024       lines = codecs.StreamReaderWriter(sys.stdin,
3025                                         codecs.getreader('utf8'),
3026                                         codecs.getwriter('utf8'),
3027                                         'replace').read().split('\n')
3028     else:
3029       lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
3030
3031     carriage_return_found = False
3032     # Remove trailing '\r'.
3033     for linenum in range(len(lines)):
3034       if lines[linenum].endswith('\r'):
3035         lines[linenum] = lines[linenum].rstrip('\r')
3036         carriage_return_found = True
3037
3038   except IOError:
3039     sys.stderr.write(
3040         "Skipping input '%s': Can't open for reading\n" % filename)
3041     return
3042
3043   # Note, if no dot is found, this will give the entire filename as the ext.
3044   file_extension = filename[filename.rfind('.') + 1:]
3045
3046   # When reading from stdin, the extension is unknown, so no cpplint tests
3047   # should rely on the extension.
3048   if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
3049       and file_extension != 'cpp'):
3050     sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
3051   else:
3052     ProcessFileData(filename, file_extension, lines, Error)
3053     if carriage_return_found and os.linesep != '\r\n':
3054       # Use 0 for linenum since outputing only one error for potentially
3055       # several lines.
3056       Error(filename, 0, 'whitespace/newline', 1,
3057             'One or more unexpected \\r (^M) found;'
3058             'better to use only a \\n')
3059
3060   sys.stderr.write('Done processing %s\n' % filename)
3061
3062
3063 def PrintUsage(message):
3064   """Prints a brief usage string and exits, optionally with an error message.
3065
3066   Args:
3067     message: The optional error message.
3068   """
3069   sys.stderr.write(_USAGE)
3070   if message:
3071     sys.exit('\nFATAL ERROR: ' + message)
3072   else:
3073     sys.exit(1)
3074
3075
3076 def PrintCategories():
3077   """Prints a list of all the error-categories used by error messages.
3078
3079   These are the categories used to filter messages via --filter.
3080   """
3081   sys.stderr.write(''.join('  %s\n' % cat for cat in _ERROR_CATEGORIES))
3082   sys.exit(0)
3083
3084
3085 def ParseArguments(args):
3086   """Parses the command line arguments.
3087
3088   This may set the output format and verbosity level as side-effects.
3089
3090   Args:
3091     args: The command line arguments:
3092
3093   Returns:
3094     The list of filenames to lint.
3095   """
3096   try:
3097     (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
3098                                                  'counting=',
3099                                                  'filter='])
3100   except getopt.GetoptError:
3101     PrintUsage('Invalid arguments.')
3102
3103   verbosity = _VerboseLevel()
3104   output_format = _OutputFormat()
3105   filters = ''
3106   counting_style = ''
3107
3108   for (opt, val) in opts:
3109     if opt == '--help':
3110       PrintUsage(None)
3111     elif opt == '--output':
3112       if not val in ('emacs', 'vs7'):
3113         PrintUsage('The only allowed output formats are emacs and vs7.')
3114       output_format = val
3115     elif opt == '--verbose':
3116       verbosity = int(val)
3117     elif opt == '--filter':
3118       filters = val
3119       if not filters:
3120         PrintCategories()
3121     elif opt == '--counting':
3122       if val not in ('total', 'toplevel', 'detailed'):
3123         PrintUsage('Valid counting options are total, toplevel, and detailed')
3124       counting_style = val
3125
3126   if not filenames:
3127     PrintUsage('No files were specified.')
3128
3129   _SetOutputFormat(output_format)
3130   _SetVerboseLevel(verbosity)
3131   _SetFilters(filters)
3132   _SetCountingStyle(counting_style)
3133
3134   return filenames
3135
3136
3137 def main():
3138   filenames = ParseArguments(sys.argv[1:])
3139
3140   # Change stderr to write with replacement characters so we don't die
3141   # if we try to print something containing non-ASCII characters.
3142   sys.stderr = codecs.StreamReaderWriter(sys.stderr,
3143                                          codecs.getreader('utf8'),
3144                                          codecs.getwriter('utf8'),
3145                                          'replace')
3146
3147   _cpplint_state.ResetErrorCounts()
3148   for filename in filenames:
3149     ProcessFile(filename, _cpplint_state.verbose_level)
3150   _cpplint_state.PrintErrorCounts()
3151
3152   sys.exit(_cpplint_state.error_count > 0)
3153
3154
3155 if __name__ == '__main__':
3156   main()
3157 @