1#!/usr/bin/env python
2# pylint: skip-file
3#
4# Copyright (c) 2009 Google Inc. All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met:
9#
10#    * Redistributions of source code must retain the above copyright
11# notice, this list of conditions and the following disclaimer.
12#    * Redistributions in binary form must reproduce the above
13# copyright notice, this list of conditions and the following disclaimer
14# in the documentation and/or other materials provided with the
15# distribution.
16#    * Neither the name of Google Inc. nor the names of its
17# contributors may be used to endorse or promote products derived from
18# this software without specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32"""Does google-lint on c++ files.
33
34The goal of this script is to identify places in the code that *may*
35be in non-compliance with google style.  It does not attempt to fix
36up these problems -- the point is to educate.  It does also not
37attempt to find all problems, or to ensure that everything it does
38find is legitimately a problem.
39
40In particular, we can get very confused by /* and // inside strings!
41We do a small hack, which is to ignore //'s with "'s after them on the
42same line, but it is far from perfect (in either direction).
43"""
44
45import codecs
46import copy
47import getopt
48import math  # for log
49import os
50import re
51import sre_compile
52import string
53import sys
54import unicodedata
55import sysconfig
56
57try:
58  xrange          # Python 2
59except NameError:
60  xrange = range  # Python 3
61
62
63_USAGE = """
64Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
65                   [--counting=total|toplevel|detailed] [--root=subdir]
66                   [--linelength=digits] [--headers=x,y,...]
67                   [--quiet]
68        <file> [file] ...
69
70  The style guidelines this tries to follow are those in
71    https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
72
73  Every problem is given a confidence score from 1-5, with 5 meaning we are
74  certain of the problem, and 1 meaning it could be a legitimate construct.
75  This will miss some errors, and is not a substitute for a code review.
76
77  To suppress false-positive errors of a certain category, add a
78  'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
79  suppresses errors of all categories on that line.
80
81  The files passed in will be linted; at least one file must be provided.
82  Default linted extensions are .cc, .cpp, .cu, .cuh and .h.  Change the
83  extensions with the --extensions flag.
84
85  Flags:
86
87    output=vs7
88      By default, the output is formatted to ease emacs parsing.  Visual Studio
89      compatible output (vs7) may also be used.  Other formats are unsupported.
90
91    verbose=#
92      Specify a number 0-5 to restrict errors to certain verbosity levels.
93
94    quiet
95      Don't print anything if no errors are found.
96
97    filter=-x,+y,...
98      Specify a comma-separated list of category-filters to apply: only
99      error messages whose category names pass the filters will be printed.
100      (Category names are printed with the message and look like
101      "[whitespace/indent]".)  Filters are evaluated left to right.
102      "-FOO" and "FOO" means "do not print categories that start with FOO".
103      "+FOO" means "do print categories that start with FOO".
104
105      Examples: --filter=-whitespace,+whitespace/braces
106                --filter=whitespace,runtime/printf,+runtime/printf_format
107                --filter=-,+build/include_what_you_use
108
109      To see a list of all the categories used in cpplint, pass no arg:
110         --filter=
111
112    counting=total|toplevel|detailed
113      The total number of errors found is always printed. If
114      'toplevel' is provided, then the count of errors in each of
115      the top-level categories like 'build' and 'whitespace' will
116      also be printed. If 'detailed' is provided, then a count
117      is provided for each category like 'build/class'.
118
119    root=subdir
120      The root directory used for deriving header guard CPP variable.
121      By default, the header guard CPP variable is calculated as the relative
122      path to the directory that contains .git, .hg, or .svn.  When this flag
123      is specified, the relative path is calculated from the specified
124      directory. If the specified directory does not exist, this flag is
125      ignored.
126
127      Examples:
128        Assuming that top/src/.git exists (and cwd=top/src), the header guard
129        CPP variables for top/src/chrome/browser/ui/browser.h are:
130
131        No flag => CHROME_BROWSER_UI_BROWSER_H_
132        --root=chrome => BROWSER_UI_BROWSER_H_
133        --root=chrome/browser => UI_BROWSER_H_
134        --root=.. => SRC_CHROME_BROWSER_UI_BROWSER_H_
135
136    linelength=digits
137      This is the allowed line length for the project. The default value is
138      80 characters.
139
140      Examples:
141        --linelength=120
142
143    extensions=extension,extension,...
144      The allowed file extensions that cpplint will check
145
146      Examples:
147        --extensions=hpp,cpp
148
149    headers=x,y,...
150      The header extensions that cpplint will treat as .h in checks. Values are
151      automatically added to --extensions list.
152
153      Examples:
154        --headers=hpp,hxx
155        --headers=hpp
156
157    cpplint.py supports per-directory configurations specified in CPPLINT.cfg
158    files. CPPLINT.cfg file can contain a number of key=value pairs.
159    Currently the following options are supported:
160
161      set noparent
162      filter=+filter1,-filter2,...
163      exclude_files=regex
164      linelength=80
165      root=subdir
166      headers=x,y,...
167
168    "set noparent" option prevents cpplint from traversing directory tree
169    upwards looking for more .cfg files in parent directories. This option
170    is usually placed in the top-level project directory.
171
172    The "filter" option is similar in function to --filter flag. It specifies
173    message filters in addition to the |_DEFAULT_FILTERS| and those specified
174    through --filter command-line flag.
175
176    "exclude_files" allows to specify a regular expression to be matched against
177    a file name. If the expression matches, the file is skipped and not run
178    through liner.
179
180    "linelength" allows to specify the allowed line length for the project.
181
182    The "root" option is similar in function to the --root flag (see example
183    above). Paths are relative to the directory of the CPPLINT.cfg.
184
185    The "headers" option is similar in function to the --headers flag
186    (see example above).
187
188    CPPLINT.cfg has an effect on files in the same directory and all
189    sub-directories, unless overridden by a nested configuration file.
190
191      Example file:
192        filter=-build/include_order,+build/include_alpha
193        exclude_files=.*\.cc
194
195    The above example disables build/include_order warning and enables
196    build/include_alpha as well as excludes all .cc from being
197    processed by linter, in the current directory (where the .cfg
198    file is located) and all sub-directories.
199"""
200
201# We categorize each error message we print.  Here are the categories.
202# We want an explicit list so we can list them all in cpplint --filter=.
203# If you add a new error message with a new category, add it to the list
204# here!  cpplint_unittest.py should tell you if you forget to do this.
205_ERROR_CATEGORIES = [
206    'build/class',
207    'build/c++11',
208    'build/c++14',
209    'build/c++tr1',
210    'build/deprecated',
211    'build/endif_comment',
212    'build/explicit_make_pair',
213    'build/forward_decl',
214    'build/header_guard',
215    'build/include',
216    'build/include_alpha',
217    'build/include_order',
218    'build/include_what_you_use',
219    'build/namespaces',
220    'build/printf_format',
221    'build/storage_class',
222    'legal/copyright',
223    'readability/alt_tokens',
224    'readability/braces',
225    'readability/casting',
226    'readability/check',
227    'readability/constructors',
228    'readability/fn_size',
229    'readability/inheritance',
230    'readability/multiline_comment',
231    'readability/multiline_string',
232    'readability/namespace',
233    'readability/nolint',
234    'readability/nul',
235    'readability/strings',
236    'readability/todo',
237    'readability/utf8',
238    'runtime/arrays',
239    'runtime/casting',
240    'runtime/explicit',
241    'runtime/int',
242    'runtime/init',
243    'runtime/invalid_increment',
244    'runtime/member_string_references',
245    'runtime/memset',
246    'runtime/indentation_namespace',
247    'runtime/operator',
248    'runtime/printf',
249    'runtime/printf_format',
250    'runtime/references',
251    'runtime/string',
252    'runtime/threadsafe_fn',
253    'runtime/vlog',
254    'whitespace/blank_line',
255    'whitespace/braces',
256    'whitespace/comma',
257    'whitespace/comments',
258    'whitespace/empty_conditional_body',
259    'whitespace/empty_if_body',
260    'whitespace/empty_loop_body',
261    'whitespace/end_of_line',
262    'whitespace/ending_newline',
263    'whitespace/forcolon',
264    'whitespace/indent',
265    'whitespace/line_length',
266    'whitespace/newline',
267    'whitespace/operators',
268    'whitespace/parens',
269    'whitespace/semicolon',
270    'whitespace/tab',
271    'whitespace/todo',
272    ]
273
274# These error categories are no longer enforced by cpplint, but for backwards-
275# compatibility they may still appear in NOLINT comments.
276_LEGACY_ERROR_CATEGORIES = [
277    'readability/streams',
278    'readability/function',
279    ]
280
281# The default state of the category filter. This is overridden by the --filter=
282# flag. By default all errors are on, so only add here categories that should be
283# off by default (i.e., categories that must be enabled by the --filter= flags).
284# All entries here should start with a '-' or '+', as in the --filter= flag.
285_DEFAULT_FILTERS = ['-build/include_alpha']
286
287# The default list of categories suppressed for C (not C++) files.
288_DEFAULT_C_SUPPRESSED_CATEGORIES = [
289    'readability/casting',
290    ]
291
292# The default list of categories suppressed for Linux Kernel files.
293_DEFAULT_KERNEL_SUPPRESSED_CATEGORIES = [
294    'whitespace/tab',
295    ]
296
297# We used to check for high-bit characters, but after much discussion we
298# decided those were OK, as long as they were in UTF-8 and didn't represent
299# hard-coded international strings, which belong in a separate i18n file.
300
301# C++ headers
302_CPP_HEADERS = frozenset([
303    # Legacy
304    'algobase.h',
305    'algo.h',
306    'alloc.h',
307    'builtinbuf.h',
308    'bvector.h',
309    'complex.h',
310    'defalloc.h',
311    'deque.h',
312    'editbuf.h',
313    'fstream.h',
314    'function.h',
315    'hash_map',
316    'hash_map.h',
317    'hash_set',
318    'hash_set.h',
319    'hashtable.h',
320    'heap.h',
321    'indstream.h',
322    'iomanip.h',
323    'iostream.h',
324    'istream.h',
325    'iterator.h',
326    'list.h',
327    'map.h',
328    'multimap.h',
329    'multiset.h',
330    'ostream.h',
331    'pair.h',
332    'parsestream.h',
333    'pfstream.h',
334    'procbuf.h',
335    'pthread_alloc',
336    'pthread_alloc.h',
337    'rope',
338    'rope.h',
339    'ropeimpl.h',
340    'set.h',
341    'slist',
342    'slist.h',
343    'stack.h',
344    'stdiostream.h',
345    'stl_alloc.h',
346    'stl_relops.h',
347    'streambuf.h',
348    'stream.h',
349    'strfile.h',
350    'strstream.h',
351    'tempbuf.h',
352    'tree.h',
353    'type_traits.h',
354    'vector.h',
355    # 17.6.1.2 C++ library headers
356    'algorithm',
357    'array',
358    'atomic',
359    'bitset',
360    'chrono',
361    'codecvt',
362    'complex',
363    'condition_variable',
364    'deque',
365    'exception',
366    'forward_list',
367    'fstream',
368    'functional',
369    'future',
370    'initializer_list',
371    'iomanip',
372    'ios',
373    'iosfwd',
374    'iostream',
375    'istream',
376    'iterator',
377    'limits',
378    'list',
379    'locale',
380    'map',
381    'memory',
382    'mutex',
383    'new',
384    'numeric',
385    'ostream',
386    'queue',
387    'random',
388    'ratio',
389    'regex',
390    'scoped_allocator',
391    'set',
392    'sstream',
393    'stack',
394    'stdexcept',
395    'streambuf',
396    'string',
397    'strstream',
398    'system_error',
399    'thread',
400    'tuple',
401    'typeindex',
402    'typeinfo',
403    'type_traits',
404    'unordered_map',
405    'unordered_set',
406    'utility',
407    'valarray',
408    'vector',
409    # 17.6.1.2 C++ headers for C library facilities
410    'cassert',
411    'ccomplex',
412    'cctype',
413    'cerrno',
414    'cfenv',
415    'cfloat',
416    'cinttypes',
417    'ciso646',
418    'climits',
419    'clocale',
420    'cmath',
421    'csetjmp',
422    'csignal',
423    'cstdalign',
424    'cstdarg',
425    'cstdbool',
426    'cstddef',
427    'cstdint',
428    'cstdio',
429    'cstdlib',
430    'cstring',
431    'ctgmath',
432    'ctime',
433    'cuchar',
434    'cwchar',
435    'cwctype',
436    ])
437
438# Type names
439_TYPES = re.compile(
440    r'^(?:'
441    # [dcl.type.simple]
442    r'(char(16_t|32_t)?)|wchar_t|'
443    r'bool|short|int|long|signed|unsigned|float|double|'
444    # [support.types]
445    r'(ptrdiff_t|size_t|max_align_t|nullptr_t)|'
446    # [cstdint.syn]
447    r'(u?int(_fast|_least)?(8|16|32|64)_t)|'
448    r'(u?int(max|ptr)_t)|'
449    r')$')
450
451
452# These headers are excluded from [build/include] and [build/include_order]
453# checks:
454# - Anything not following google file name conventions (containing an
455#   uppercase character, such as Python.h or nsStringAPI.h, for example).
456# - Lua headers.
457_THIRD_PARTY_HEADERS_PATTERN = re.compile(
458    r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$')
459
460# Pattern for matching FileInfo.BaseName() against test file name
461_TEST_FILE_SUFFIX = r'(_test|_unittest|_regtest)$'
462
463# Pattern that matches only complete whitespace, possibly across multiple lines.
464_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r'^\s*$', re.DOTALL)
465
466# Assertion macros.  These are defined in base/logging.h and
467# testing/base/public/gunit.h.
468_CHECK_MACROS = [
469    'DCHECK', 'CHECK',
470    'EXPECT_TRUE', 'ASSERT_TRUE',
471    'EXPECT_FALSE', 'ASSERT_FALSE',
472    ]
473
474# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
475_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
476
477for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
478                        ('>=', 'GE'), ('>', 'GT'),
479                        ('<=', 'LE'), ('<', 'LT')]:
480  _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
481  _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
482  _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
483  _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
484
485for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
486                            ('>=', 'LT'), ('>', 'LE'),
487                            ('<=', 'GT'), ('<', 'GE')]:
488  _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
489  _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
490
491# Alternative tokens and their replacements.  For full list, see section 2.5
492# Alternative tokens [lex.digraph] in the C++ standard.
493#
494# Digraphs (such as '%:') are not included here since it's a mess to
495# match those on a word boundary.
496_ALT_TOKEN_REPLACEMENT = {
497    'and': '&&',
498    'bitor': '|',
499    'or': '||',
500    'xor': '^',
501    'compl': '~',
502    'bitand': '&',
503    'and_eq': '&=',
504    'or_eq': '|=',
505    'xor_eq': '^=',
506    'not': '!',
507    'not_eq': '!='
508    }
509
510# Compile regular expression that matches all the above keywords.  The "[ =()]"
511# bit is meant to avoid matching these keywords outside of boolean expressions.
512#
513# False positives include C-style multi-line comments and multi-line strings
514# but those have always been troublesome for cpplint.
515_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
516    r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
517
518
519# These constants define types of headers for use with
520# _IncludeState.CheckNextIncludeOrder().
521_C_SYS_HEADER = 1
522_CPP_SYS_HEADER = 2
523_LIKELY_MY_HEADER = 3
524_POSSIBLE_MY_HEADER = 4
525_OTHER_HEADER = 5
526
527# These constants define the current inline assembly state
528_NO_ASM = 0       # Outside of inline assembly block
529_INSIDE_ASM = 1   # Inside inline assembly block
530_END_ASM = 2      # Last line of inline assembly block
531_BLOCK_ASM = 3    # The whole block is an inline assembly block
532
533# Match start of assembly blocks
534_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
535                        r'(?:\s+(volatile|__volatile__))?'
536                        r'\s*[{(]')
537
538# Match strings that indicate we're working on a C (not C++) file.
539_SEARCH_C_FILE = re.compile(r'\b(?:LINT_C_FILE|'
540                            r'vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))')
541
542# Match string that indicates we're working on a Linux Kernel file.
543_SEARCH_KERNEL_FILE = re.compile(r'\b(?:LINT_KERNEL_FILE)')
544
545_regexp_compile_cache = {}
546
547# {str, set(int)}: a map from error categories to sets of linenumbers
548# on which those errors are expected and should be suppressed.
549_error_suppressions = {}
550
551# The root directory used for deriving header guard CPP variable.
552# This is set by --root flag.
553_root = None
554_root_debug = False
555
556# The allowed line length of files.
557# This is set by --linelength flag.
558_line_length = 80
559
560# The allowed extensions for file names
561# This is set by --extensions flag.
562_valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh'])
563
564# Treat all headers starting with 'h' equally: .h, .hpp, .hxx etc.
565# This is set by --headers flag.
566_hpp_headers = set(['h'])
567
568# {str, bool}: a map from error categories to booleans which indicate if the
569# category should be suppressed for every line.
570_global_error_suppressions = {}
571
572def ProcessHppHeadersOption(val):
573  global _hpp_headers
574  try:
575    _hpp_headers = set(val.split(','))
576    # Automatically append to extensions list so it does not have to be set 2 times
577    _valid_extensions.update(_hpp_headers)
578  except ValueError:
579    PrintUsage('Header extensions must be comma separated list.')
580
581def IsHeaderExtension(file_extension):
582  return file_extension in _hpp_headers
583
584def ParseNolintSuppressions(filename, raw_line, linenum, error):
585  """Updates the global list of line error-suppressions.
586
587  Parses any NOLINT comments on the current line, updating the global
588  error_suppressions store.  Reports an error if the NOLINT comment
589  was malformed.
590
591  Args:
592    filename: str, the name of the input file.
593    raw_line: str, the line of input text, with comments.
594    linenum: int, the number of the current line.
595    error: function, an error handler.
596  """
597  matched = Search(r'\bNOLINT(NEXTLINE)?\b(\([^)]+\))?', raw_line)
598  if matched:
599    if matched.group(1):
600      suppressed_line = linenum + 1
601    else:
602      suppressed_line = linenum
603    category = matched.group(2)
604    if category in (None, '(*)'):  # => "suppress all"
605      _error_suppressions.setdefault(None, set()).add(suppressed_line)
606    else:
607      if category.startswith('(') and category.endswith(')'):
608        category = category[1:-1]
609        if category in _ERROR_CATEGORIES:
610          _error_suppressions.setdefault(category, set()).add(suppressed_line)
611        elif category not in _LEGACY_ERROR_CATEGORIES:
612          error(filename, linenum, 'readability/nolint', 5,
613                'Unknown NOLINT error category: %s' % category)
614
615
616def ProcessGlobalSuppresions(lines):
617  """Updates the list of global error suppressions.
618
619  Parses any lint directives in the file that have global effect.
620
621  Args:
622    lines: An array of strings, each representing a line of the file, with the
623           last element being empty if the file is terminated with a newline.
624  """
625  for line in lines:
626    if _SEARCH_C_FILE.search(line):
627      for category in _DEFAULT_C_SUPPRESSED_CATEGORIES:
628        _global_error_suppressions[category] = True
629    if _SEARCH_KERNEL_FILE.search(line):
630      for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES:
631        _global_error_suppressions[category] = True
632
633
634def ResetNolintSuppressions():
635  """Resets the set of NOLINT suppressions to empty."""
636  _error_suppressions.clear()
637  _global_error_suppressions.clear()
638
639
640def IsErrorSuppressedByNolint(category, linenum):
641  """Returns true if the specified error category is suppressed on this line.
642
643  Consults the global error_suppressions map populated by
644  ParseNolintSuppressions/ProcessGlobalSuppresions/ResetNolintSuppressions.
645
646  Args:
647    category: str, the category of the error.
648    linenum: int, the current line number.
649  Returns:
650    bool, True iff the error should be suppressed due to a NOLINT comment or
651    global suppression.
652  """
653  return (_global_error_suppressions.get(category, False) or
654          linenum in _error_suppressions.get(category, set()) or
655          linenum in _error_suppressions.get(None, set()))
656
657
658def Match(pattern, s):
659  """Matches the string with the pattern, caching the compiled regexp."""
660  # The regexp compilation caching is inlined in both Match and Search for
661  # performance reasons; factoring it out into a separate function turns out
662  # to be noticeably expensive.
663  if pattern not in _regexp_compile_cache:
664    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
665  return _regexp_compile_cache[pattern].match(s)
666
667
668def ReplaceAll(pattern, rep, s):
669  """Replaces instances of pattern in a string with a replacement.
670
671  The compiled regex is kept in a cache shared by Match and Search.
672
673  Args:
674    pattern: regex pattern
675    rep: replacement text
676    s: search string
677
678  Returns:
679    string with replacements made (or original string if no replacements)
680  """
681  if pattern not in _regexp_compile_cache:
682    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
683  return _regexp_compile_cache[pattern].sub(rep, s)
684
685
686def Search(pattern, s):
687  """Searches the string for the pattern, caching the compiled regexp."""
688  if pattern not in _regexp_compile_cache:
689    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
690  return _regexp_compile_cache[pattern].search(s)
691
692
693def _IsSourceExtension(s):
694  """File extension (excluding dot) matches a source file extension."""
695  return s in ('c', 'cc', 'cpp', 'cxx')
696
697
698class _IncludeState(object):
699  """Tracks line numbers for includes, and the order in which includes appear.
700
701  include_list contains list of lists of (header, line number) pairs.
702  It's a lists of lists rather than just one flat list to make it
703  easier to update across preprocessor boundaries.
704
705  Call CheckNextIncludeOrder() once for each header in the file, passing
706  in the type constants defined above. Calls in an illegal order will
707  raise an _IncludeError with an appropriate error message.
708
709  """
710  # self._section will move monotonically through this set. If it ever
711  # needs to move backwards, CheckNextIncludeOrder will raise an error.
712  _INITIAL_SECTION = 0
713  _MY_H_SECTION = 1
714  _C_SECTION = 2
715  _CPP_SECTION = 3
716  _OTHER_H_SECTION = 4
717
718  _TYPE_NAMES = {
719      _C_SYS_HEADER: 'C system header',
720      _CPP_SYS_HEADER: 'C++ system header',
721      _LIKELY_MY_HEADER: 'header this file implements',
722      _POSSIBLE_MY_HEADER: 'header this file may implement',
723      _OTHER_HEADER: 'other header',
724      }
725  _SECTION_NAMES = {
726      _INITIAL_SECTION: "... nothing. (This can't be an error.)",
727      _MY_H_SECTION: 'a header this file implements',
728      _C_SECTION: 'C system header',
729      _CPP_SECTION: 'C++ system header',
730      _OTHER_H_SECTION: 'other header',
731      }
732
733  def __init__(self):
734    self.include_list = [[]]
735    self.ResetSection('')
736
737  def FindHeader(self, header):
738    """Check if a header has already been included.
739
740    Args:
741      header: header to check.
742    Returns:
743      Line number of previous occurrence, or -1 if the header has not
744      been seen before.
745    """
746    for section_list in self.include_list:
747      for f in section_list:
748        if f[0] == header:
749          return f[1]
750    return -1
751
752  def ResetSection(self, directive):
753    """Reset section checking for preprocessor directive.
754
755    Args:
756      directive: preprocessor directive (e.g. "if", "else").
757    """
758    # The name of the current section.
759    self._section = self._INITIAL_SECTION
760    # The path of last found header.
761    self._last_header = ''
762
763    # Update list of includes.  Note that we never pop from the
764    # include list.
765    if directive in ('if', 'ifdef', 'ifndef'):
766      self.include_list.append([])
767    elif directive in ('else', 'elif'):
768      self.include_list[-1] = []
769
770  def SetLastHeader(self, header_path):
771    self._last_header = header_path
772
773  def CanonicalizeAlphabeticalOrder(self, header_path):
774    """Returns a path canonicalized for alphabetical comparison.
775
776    - replaces "-" with "_" so they both cmp the same.
777    - removes '-inl' since we don't require them to be after the main header.
778    - lowercase everything, just in case.
779
780    Args:
781      header_path: Path to be canonicalized.
782
783    Returns:
784      Canonicalized path.
785    """
786    return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
787
788  def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
789    """Check if a header is in alphabetical order with the previous header.
790
791    Args:
792      clean_lines: A CleansedLines instance containing the file.
793      linenum: The number of the line to check.
794      header_path: Canonicalized header to be checked.
795
796    Returns:
797      Returns true if the header is in alphabetical order.
798    """
799    # If previous section is different from current section, _last_header will
800    # be reset to empty string, so it's always less than current header.
801    #
802    # If previous line was a blank line, assume that the headers are
803    # intentionally sorted the way they are.
804    if (self._last_header > header_path and
805        Match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])):
806      return False
807    return True
808
809  def CheckNextIncludeOrder(self, header_type):
810    """Returns a non-empty error message if the next header is out of order.
811
812    This function also updates the internal state to be ready to check
813    the next include.
814
815    Args:
816      header_type: One of the _XXX_HEADER constants defined above.
817
818    Returns:
819      The empty string if the header is in the right order, or an
820      error message describing what's wrong.
821
822    """
823    error_message = ('Found %s after %s' %
824                     (self._TYPE_NAMES[header_type],
825                      self._SECTION_NAMES[self._section]))
826
827    last_section = self._section
828
829    if header_type == _C_SYS_HEADER:
830      if self._section <= self._C_SECTION:
831        self._section = self._C_SECTION
832      else:
833        self._last_header = ''
834        return error_message
835    elif header_type == _CPP_SYS_HEADER:
836      if self._section <= self._CPP_SECTION:
837        self._section = self._CPP_SECTION
838      else:
839        self._last_header = ''
840        return error_message
841    elif header_type == _LIKELY_MY_HEADER:
842      if self._section <= self._MY_H_SECTION:
843        self._section = self._MY_H_SECTION
844      else:
845        self._section = self._OTHER_H_SECTION
846    elif header_type == _POSSIBLE_MY_HEADER:
847      if self._section <= self._MY_H_SECTION:
848        self._section = self._MY_H_SECTION
849      else:
850        # This will always be the fallback because we're not sure
851        # enough that the header is associated with this file.
852        self._section = self._OTHER_H_SECTION
853    else:
854      assert header_type == _OTHER_HEADER
855      self._section = self._OTHER_H_SECTION
856
857    if last_section != self._section:
858      self._last_header = ''
859
860    return ''
861
862
863class _CppLintState(object):
864  """Maintains module-wide state.."""
865
866  def __init__(self):
867    self.verbose_level = 1  # global setting.
868    self.error_count = 0    # global count of reported errors
869    # filters to apply when emitting error messages
870    self.filters = _DEFAULT_FILTERS[:]
871    # backup of filter list. Used to restore the state after each file.
872    self._filters_backup = self.filters[:]
873    self.counting = 'total'  # In what way are we counting errors?
874    self.errors_by_category = {}  # string to int dict storing error counts
875    self.quiet = False  # Suppress non-error messagess?
876
877    # output format:
878    # "emacs" - format that emacs can parse (default)
879    # "vs7" - format that Microsoft Visual Studio 7 can parse
880    self.output_format = 'emacs'
881
882  def SetOutputFormat(self, output_format):
883    """Sets the output format for errors."""
884    self.output_format = output_format
885
886  def SetQuiet(self, quiet):
887    """Sets the module's quiet settings, and returns the previous setting."""
888    last_quiet = self.quiet
889    self.quiet = quiet
890    return last_quiet
891
892  def SetVerboseLevel(self, level):
893    """Sets the module's verbosity, and returns the previous setting."""
894    last_verbose_level = self.verbose_level
895    self.verbose_level = level
896    return last_verbose_level
897
898  def SetCountingStyle(self, counting_style):
899    """Sets the module's counting options."""
900    self.counting = counting_style
901
902  def SetFilters(self, filters):
903    """Sets the error-message filters.
904
905    These filters are applied when deciding whether to emit a given
906    error message.
907
908    Args:
909      filters: A string of comma-separated filters (eg "+whitespace/indent").
910               Each filter should start with + or -; else we die.
911
912    Raises:
913      ValueError: The comma-separated filters did not all start with '+' or '-'.
914                  E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
915    """
916    # Default filters always have less priority than the flag ones.
917    self.filters = _DEFAULT_FILTERS[:]
918    self.AddFilters(filters)
919
920  def AddFilters(self, filters):
921    """ Adds more filters to the existing list of error-message filters. """
922    for filt in filters.split(','):
923      clean_filt = filt.strip()
924      if clean_filt:
925        self.filters.append(clean_filt)
926    for filt in self.filters:
927      if not (filt.startswith('+') or filt.startswith('-')):
928        raise ValueError('Every filter in --filters must start with + or -'
929                         ' (%s does not)' % filt)
930
931  def BackupFilters(self):
932    """ Saves the current filter list to backup storage."""
933    self._filters_backup = self.filters[:]
934
935  def RestoreFilters(self):
936    """ Restores filters previously backed up."""
937    self.filters = self._filters_backup[:]
938
939  def ResetErrorCounts(self):
940    """Sets the module's error statistic back to zero."""
941    self.error_count = 0
942    self.errors_by_category = {}
943
944  def IncrementErrorCount(self, category):
945    """Bumps the module's error statistic."""
946    self.error_count += 1
947    if self.counting in ('toplevel', 'detailed'):
948      if self.counting != 'detailed':
949        category = category.split('/')[0]
950      if category not in self.errors_by_category:
951        self.errors_by_category[category] = 0
952      self.errors_by_category[category] += 1
953
954  def PrintErrorCounts(self):
955    """Print a summary of errors by category, and the total."""
956    for category, count in self.errors_by_category.iteritems():
957      sys.stderr.write('Category \'%s\' errors found: %d\n' %
958                       (category, count))
959    sys.stdout.write('Total errors found: %d\n' % self.error_count)
960
961_cpplint_state = _CppLintState()
962
963
964def _OutputFormat():
965  """Gets the module's output format."""
966  return _cpplint_state.output_format
967
968
969def _SetOutputFormat(output_format):
970  """Sets the module's output format."""
971  _cpplint_state.SetOutputFormat(output_format)
972
973def _Quiet():
974  """Return's the module's quiet setting."""
975  return _cpplint_state.quiet
976
977def _SetQuiet(quiet):
978  """Set the module's quiet status, and return previous setting."""
979  return _cpplint_state.SetQuiet(quiet)
980
981
982def _VerboseLevel():
983  """Returns the module's verbosity setting."""
984  return _cpplint_state.verbose_level
985
986
987def _SetVerboseLevel(level):
988  """Sets the module's verbosity, and returns the previous setting."""
989  return _cpplint_state.SetVerboseLevel(level)
990
991
992def _SetCountingStyle(level):
993  """Sets the module's counting options."""
994  _cpplint_state.SetCountingStyle(level)
995
996
997def _Filters():
998  """Returns the module's list of output filters, as a list."""
999  return _cpplint_state.filters
1000
1001
1002def _SetFilters(filters):
1003  """Sets the module's error-message filters.
1004
1005  These filters are applied when deciding whether to emit a given
1006  error message.
1007
1008  Args:
1009    filters: A string of comma-separated filters (eg "whitespace/indent").
1010             Each filter should start with + or -; else we die.
1011  """
1012  _cpplint_state.SetFilters(filters)
1013
1014def _AddFilters(filters):
1015  """Adds more filter overrides.
1016
1017  Unlike _SetFilters, this function does not reset the current list of filters
1018  available.
1019
1020  Args:
1021    filters: A string of comma-separated filters (eg "whitespace/indent").
1022             Each filter should start with + or -; else we die.
1023  """
1024  _cpplint_state.AddFilters(filters)
1025
1026def _BackupFilters():
1027  """ Saves the current filter list to backup storage."""
1028  _cpplint_state.BackupFilters()
1029
1030def _RestoreFilters():
1031  """ Restores filters previously backed up."""
1032  _cpplint_state.RestoreFilters()
1033
1034class _FunctionState(object):
1035  """Tracks current function name and the number of lines in its body."""
1036
1037  _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
1038  _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
1039
1040  def __init__(self):
1041    self.in_a_function = False
1042    self.lines_in_function = 0
1043    self.current_function = ''
1044
1045  def Begin(self, function_name):
1046    """Start analyzing function body.
1047
1048    Args:
1049      function_name: The name of the function being tracked.
1050    """
1051    self.in_a_function = True
1052    self.lines_in_function = 0
1053    self.current_function = function_name
1054
1055  def Count(self):
1056    """Count line in current function body."""
1057    if self.in_a_function:
1058      self.lines_in_function += 1
1059
1060  def Check(self, error, filename, linenum):
1061    """Report if too many lines in function body.
1062
1063    Args:
1064      error: The function to call with any errors found.
1065      filename: The name of the current file.
1066      linenum: The number of the line to check.
1067    """
1068    if not self.in_a_function:
1069      return
1070
1071    if Match(r'T(EST|est)', self.current_function):
1072      base_trigger = self._TEST_TRIGGER
1073    else:
1074      base_trigger = self._NORMAL_TRIGGER
1075    trigger = base_trigger * 2**_VerboseLevel()
1076
1077    if self.lines_in_function > trigger:
1078      error_level = int(math.log(self.lines_in_function / base_trigger, 2))
1079      # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
1080      if error_level > 5:
1081        error_level = 5
1082      error(filename, linenum, 'readability/fn_size', error_level,
1083            'Small and focused functions are preferred:'
1084            ' %s has %d non-comment lines'
1085            ' (error triggered by exceeding %d lines).'  % (
1086                self.current_function, self.lines_in_function, trigger))
1087
1088  def End(self):
1089    """Stop analyzing function body."""
1090    self.in_a_function = False
1091
1092
1093class _IncludeError(Exception):
1094  """Indicates a problem with the include order in a file."""
1095  pass
1096
1097
1098class FileInfo(object):
1099  """Provides utility functions for filenames.
1100
1101  FileInfo provides easy access to the components of a file's path
1102  relative to the project root.
1103  """
1104
1105  def __init__(self, filename):
1106    self._filename = filename
1107
1108  def FullName(self):
1109    """Make Windows paths like Unix."""
1110    return os.path.abspath(self._filename).replace('\\', '/')
1111
1112  def RepositoryName(self):
1113    """FullName after removing the local path to the repository.
1114
1115    If we have a real absolute path name here we can try to do something smart:
1116    detecting the root of the checkout and truncating /path/to/checkout from
1117    the name so that we get header guards that don't include things like
1118    "C:\Documents and Settings\..." or "/home/username/..." in them and thus
1119    people on different computers who have checked the source out to different
1120    locations won't see bogus errors.
1121    """
1122    fullname = self.FullName()
1123
1124    if os.path.exists(fullname):
1125      project_dir = os.path.dirname(fullname)
1126
1127      if os.path.exists(os.path.join(project_dir, ".svn")):
1128        # If there's a .svn file in the current directory, we recursively look
1129        # up the directory tree for the top of the SVN checkout
1130        root_dir = project_dir
1131        one_up_dir = os.path.dirname(root_dir)
1132        while os.path.exists(os.path.join(one_up_dir, ".svn")):
1133          root_dir = os.path.dirname(root_dir)
1134          one_up_dir = os.path.dirname(one_up_dir)
1135
1136        prefix = os.path.commonprefix([root_dir, project_dir])
1137        return fullname[len(prefix) + 1:]
1138
1139      # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
1140      # searching up from the current path.
1141      root_dir = current_dir = os.path.dirname(fullname)
1142      while current_dir != os.path.dirname(current_dir):
1143        if (os.path.exists(os.path.join(current_dir, ".git")) or
1144            os.path.exists(os.path.join(current_dir, ".hg")) or
1145            os.path.exists(os.path.join(current_dir, ".svn"))):
1146          root_dir = current_dir
1147        current_dir = os.path.dirname(current_dir)
1148
1149      if (os.path.exists(os.path.join(root_dir, ".git")) or
1150          os.path.exists(os.path.join(root_dir, ".hg")) or
1151          os.path.exists(os.path.join(root_dir, ".svn"))):
1152        prefix = os.path.commonprefix([root_dir, project_dir])
1153        return fullname[len(prefix) + 1:]
1154
1155    # Don't know what to do; header guard warnings may be wrong...
1156    return fullname
1157
1158  def Split(self):
1159    """Splits the file into the directory, basename, and extension.
1160
1161    For 'chrome/browser/browser.cc', Split() would
1162    return ('chrome/browser', 'browser', '.cc')
1163
1164    Returns:
1165      A tuple of (directory, basename, extension).
1166    """
1167
1168    googlename = self.RepositoryName()
1169    project, rest = os.path.split(googlename)
1170    return (project,) + os.path.splitext(rest)
1171
1172  def BaseName(self):
1173    """File base name - text after the final slash, before the final period."""
1174    return self.Split()[1]
1175
1176  def Extension(self):
1177    """File extension - text following the final period."""
1178    return self.Split()[2]
1179
1180  def NoExtension(self):
1181    """File has no source file extension."""
1182    return '/'.join(self.Split()[0:2])
1183
1184  def IsSource(self):
1185    """File has a source file extension."""
1186    return _IsSourceExtension(self.Extension()[1:])
1187
1188
1189def _ShouldPrintError(category, confidence, linenum):
1190  """If confidence >= verbose, category passes filter and is not suppressed."""
1191
1192  # There are three ways we might decide not to print an error message:
1193  # a "NOLINT(category)" comment appears in the source,
1194  # the verbosity level isn't high enough, or the filters filter it out.
1195  if IsErrorSuppressedByNolint(category, linenum):
1196    return False
1197
1198  if confidence < _cpplint_state.verbose_level:
1199    return False
1200
1201  is_filtered = False
1202  for one_filter in _Filters():
1203    if one_filter.startswith('-'):
1204      if category.startswith(one_filter[1:]):
1205        is_filtered = True
1206    elif one_filter.startswith('+'):
1207      if category.startswith(one_filter[1:]):
1208        is_filtered = False
1209    else:
1210      assert False  # should have been checked for in SetFilter.
1211  if is_filtered:
1212    return False
1213
1214  return True
1215
1216
1217def Error(filename, linenum, category, confidence, message):
1218  """Logs the fact we've found a lint error.
1219
1220  We log where the error was found, and also our confidence in the error,
1221  that is, how certain we are this is a legitimate style regression, and
1222  not a misidentification or a use that's sometimes justified.
1223
1224  False positives can be suppressed by the use of
1225  "cpplint(category)"  comments on the offending line.  These are
1226  parsed into _error_suppressions.
1227
1228  Args:
1229    filename: The name of the file containing the error.
1230    linenum: The number of the line containing the error.
1231    category: A string used to describe the "category" this bug
1232      falls under: "whitespace", say, or "runtime".  Categories
1233      may have a hierarchy separated by slashes: "whitespace/indent".
1234    confidence: A number from 1-5 representing a confidence score for
1235      the error, with 5 meaning that we are certain of the problem,
1236      and 1 meaning that it could be a legitimate construct.
1237    message: The error message.
1238  """
1239  if _ShouldPrintError(category, confidence, linenum):
1240    _cpplint_state.IncrementErrorCount(category)
1241    if _cpplint_state.output_format == 'vs7':
1242      sys.stderr.write('%s(%s): error cpplint: [%s] %s [%d]\n' % (
1243          filename, linenum, category, message, confidence))
1244    elif _cpplint_state.output_format == 'eclipse':
1245      sys.stderr.write('%s:%s: warning: %s  [%s] [%d]\n' % (
1246          filename, linenum, message, category, confidence))
1247    else:
1248      sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' % (
1249          filename, linenum, message, category, confidence))
1250
1251
1252# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
1253_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
1254    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
1255# Match a single C style comment on the same line.
1256_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/'
1257# Matches multi-line C style comments.
1258# This RE is a little bit more complicated than one might expect, because we
1259# have to take care of space removals tools so we can handle comments inside
1260# statements better.
1261# The current rule is: We only clear spaces from both sides when we're at the
1262# end of the line. Otherwise, we try to remove spaces from the right side,
1263# if this doesn't work we try on left side but only if there's a non-character
1264# on the right.
1265_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
1266    r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' +
1267    _RE_PATTERN_C_COMMENTS + r'\s+|' +
1268    r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' +
1269    _RE_PATTERN_C_COMMENTS + r')')
1270
1271
1272def IsCppString(line):
1273  """Does line terminate so, that the next symbol is in string constant.
1274
1275  This function does not consider single-line nor multi-line comments.
1276
1277  Args:
1278    line: is a partial line of code starting from the 0..n.
1279
1280  Returns:
1281    True, if next character appended to 'line' is inside a
1282    string constant.
1283  """
1284
1285  line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
1286  return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
1287
1288
1289def CleanseRawStrings(raw_lines):
1290  """Removes C++11 raw strings from lines.
1291
1292    Before:
1293      static const char kData[] = R"(
1294          multi-line string
1295          )";
1296
1297    After:
1298      static const char kData[] = ""
1299          (replaced by blank line)
1300          "";
1301
1302  Args:
1303    raw_lines: list of raw lines.
1304
1305  Returns:
1306    list of lines with C++11 raw strings replaced by empty strings.
1307  """
1308
1309  delimiter = None
1310  lines_without_raw_strings = []
1311  for line in raw_lines:
1312    if delimiter:
1313      # Inside a raw string, look for the end
1314      end = line.find(delimiter)
1315      if end >= 0:
1316        # Found the end of the string, match leading space for this
1317        # line and resume copying the original lines, and also insert
1318        # a "" on the last line.
1319        leading_space = Match(r'^(\s*)\S', line)
1320        line = leading_space.group(1) + '""' + line[end + len(delimiter):]
1321        delimiter = None
1322      else:
1323        # Haven't found the end yet, append a blank line.
1324        line = '""'
1325
1326    # Look for beginning of a raw string, and replace them with
1327    # empty strings.  This is done in a loop to handle multiple raw
1328    # strings on the same line.
1329    while delimiter is None:
1330      # Look for beginning of a raw string.
1331      # See 2.14.15 [lex.string] for syntax.
1332      #
1333      # Once we have matched a raw string, we check the prefix of the
1334      # line to make sure that the line is not part of a single line
1335      # comment.  It's done this way because we remove raw strings
1336      # before removing comments as opposed to removing comments
1337      # before removing raw strings.  This is because there are some
1338      # cpplint checks that requires the comments to be preserved, but
1339      # we don't want to check comments that are inside raw strings.
1340      matched = Match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
1341      if (matched and
1342          not Match(r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//',
1343                    matched.group(1))):
1344        delimiter = ')' + matched.group(2) + '"'
1345
1346        end = matched.group(3).find(delimiter)
1347        if end >= 0:
1348          # Raw string ended on same line
1349          line = (matched.group(1) + '""' +
1350                  matched.group(3)[end + len(delimiter):])
1351          delimiter = None
1352        else:
1353          # Start of a multi-line raw string
1354          line = matched.group(1) + '""'
1355      else:
1356        break
1357
1358    lines_without_raw_strings.append(line)
1359
1360  # TODO(unknown): if delimiter is not None here, we might want to
1361  # emit a warning for unterminated string.
1362  return lines_without_raw_strings
1363
1364
1365def FindNextMultiLineCommentStart(lines, lineix):
1366  """Find the beginning marker for a multiline comment."""
1367  while lineix < len(lines):
1368    if lines[lineix].strip().startswith('/*'):
1369      # Only return this marker if the comment goes beyond this line
1370      if lines[lineix].strip().find('*/', 2) < 0:
1371        return lineix
1372    lineix += 1
1373  return len(lines)
1374
1375
1376def FindNextMultiLineCommentEnd(lines, lineix):
1377  """We are inside a comment, find the end marker."""
1378  while lineix < len(lines):
1379    if lines[lineix].strip().endswith('*/'):
1380      return lineix
1381    lineix += 1
1382  return len(lines)
1383
1384
1385def RemoveMultiLineCommentsFromRange(lines, begin, end):
1386  """Clears a range of lines for multi-line comments."""
1387  # Having // <empty> comments makes the lines non-empty, so we will not get
1388  # unnecessary blank line warnings later in the code.
1389  for i in range(begin, end):
1390    lines[i] = '/**/'
1391
1392
1393def RemoveMultiLineComments(filename, lines, error):
1394  """Removes multiline (c-style) comments from lines."""
1395  lineix = 0
1396  while lineix < len(lines):
1397    lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
1398    if lineix_begin >= len(lines):
1399      return
1400    lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
1401    if lineix_end >= len(lines):
1402      error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
1403            'Could not find end of multi-line comment')
1404      return
1405    RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
1406    lineix = lineix_end + 1
1407
1408
1409def CleanseComments(line):
1410  """Removes //-comments and single-line C-style /* */ comments.
1411
1412  Args:
1413    line: A line of C++ source.
1414
1415  Returns:
1416    The line with single-line comments removed.
1417  """
1418  commentpos = line.find('//')
1419  if commentpos != -1 and not IsCppString(line[:commentpos]):
1420    line = line[:commentpos].rstrip()
1421  # get rid of /* ... */
1422  return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
1423
1424
1425class CleansedLines(object):
1426  """Holds 4 copies of all lines with different preprocessing applied to them.
1427
1428  1) elided member contains lines without strings and comments.
1429  2) lines member contains lines without comments.
1430  3) raw_lines member contains all the lines without processing.
1431  4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw
1432     strings removed.
1433  All these members are of <type 'list'>, and of the same length.
1434  """
1435
1436  def __init__(self, lines):
1437    self.elided = []
1438    self.lines = []
1439    self.raw_lines = lines
1440    self.num_lines = len(lines)
1441    self.lines_without_raw_strings = CleanseRawStrings(lines)
1442    for linenum in range(len(self.lines_without_raw_strings)):
1443      self.lines.append(CleanseComments(
1444          self.lines_without_raw_strings[linenum]))
1445      elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
1446      self.elided.append(CleanseComments(elided))
1447
1448  def NumLines(self):
1449    """Returns the number of lines represented."""
1450    return self.num_lines
1451
1452  @staticmethod
1453  def _CollapseStrings(elided):
1454    """Collapses strings and chars on a line to simple "" or '' blocks.
1455
1456    We nix strings first so we're not fooled by text like '"http://"'
1457
1458    Args:
1459      elided: The line being processed.
1460
1461    Returns:
1462      The line with collapsed strings.
1463    """
1464    if _RE_PATTERN_INCLUDE.match(elided):
1465      return elided
1466
1467    # Remove escaped characters first to make quote/single quote collapsing
1468    # basic.  Things that look like escaped characters shouldn't occur
1469    # outside of strings and chars.
1470    elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
1471
1472    # Replace quoted strings and digit separators.  Both single quotes
1473    # and double quotes are processed in the same loop, otherwise
1474    # nested quotes wouldn't work.
1475    collapsed = ''
1476    while True:
1477      # Find the first quote character
1478      match = Match(r'^([^\'"]*)([\'"])(.*)$', elided)
1479      if not match:
1480        collapsed += elided
1481        break
1482      head, quote, tail = match.groups()
1483
1484      if quote == '"':
1485        # Collapse double quoted strings
1486        second_quote = tail.find('"')
1487        if second_quote >= 0:
1488          collapsed += head + '""'
1489          elided = tail[second_quote + 1:]
1490        else:
1491          # Unmatched double quote, don't bother processing the rest
1492          # of the line since this is probably a multiline string.
1493          collapsed += elided
1494          break
1495      else:
1496        # Found single quote, check nearby text to eliminate digit separators.
1497        #
1498        # There is no special handling for floating point here, because
1499        # the integer/fractional/exponent parts would all be parsed
1500        # correctly as long as there are digits on both sides of the
1501        # separator.  So we are fine as long as we don't see something
1502        # like "0.'3" (gcc 4.9.0 will not allow this literal).
1503        if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head):
1504          match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail)
1505          collapsed += head + match_literal.group(1).replace("'", '')
1506          elided = match_literal.group(2)
1507        else:
1508          second_quote = tail.find('\'')
1509          if second_quote >= 0:
1510            collapsed += head + "''"
1511            elided = tail[second_quote + 1:]
1512          else:
1513            # Unmatched single quote
1514            collapsed += elided
1515            break
1516
1517    return collapsed
1518
1519
1520def FindEndOfExpressionInLine(line, startpos, stack):
1521  """Find the position just after the end of current parenthesized expression.
1522
1523  Args:
1524    line: a CleansedLines line.
1525    startpos: start searching at this position.
1526    stack: nesting stack at startpos.
1527
1528  Returns:
1529    On finding matching end: (index just after matching end, None)
1530    On finding an unclosed expression: (-1, None)
1531    Otherwise: (-1, new stack at end of this line)
1532  """
1533  for i in xrange(startpos, len(line)):
1534    char = line[i]
1535    if char in '([{':
1536      # Found start of parenthesized expression, push to expression stack
1537      stack.append(char)
1538    elif char == '<':
1539      # Found potential start of template argument list
1540      if i > 0 and line[i - 1] == '<':
1541        # Left shift operator
1542        if stack and stack[-1] == '<':
1543          stack.pop()
1544          if not stack:
1545            return (-1, None)
1546      elif i > 0 and Search(r'\boperator\s*$', line[0:i]):
1547        # operator<, don't add to stack
1548        continue
1549      else:
1550        # Tentative start of template argument list
1551        stack.append('<')
1552    elif char in ')]}':
1553      # Found end of parenthesized expression.
1554      #
1555      # If we are currently expecting a matching '>', the pending '<'
1556      # must have been an operator.  Remove them from expression stack.
1557      while stack and stack[-1] == '<':
1558        stack.pop()
1559      if not stack:
1560        return (-1, None)
1561      if ((stack[-1] == '(' and char == ')') or
1562          (stack[-1] == '[' and char == ']') or
1563          (stack[-1] == '{' and char == '}')):
1564        stack.pop()
1565        if not stack:
1566          return (i + 1, None)
1567      else:
1568        # Mismatched parentheses
1569        return (-1, None)
1570    elif char == '>':
1571      # Found potential end of template argument list.
1572
1573      # Ignore "->" and operator functions
1574      if (i > 0 and
1575          (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))):
1576        continue
1577
1578      # Pop the stack if there is a matching '<'.  Otherwise, ignore
1579      # this '>' since it must be an operator.
1580      if stack:
1581        if stack[-1] == '<':
1582          stack.pop()
1583          if not stack:
1584            return (i + 1, None)
1585    elif char == ';':
1586      # Found something that look like end of statements.  If we are currently
1587      # expecting a '>', the matching '<' must have been an operator, since
1588      # template argument list should not contain statements.
1589      while stack and stack[-1] == '<':
1590        stack.pop()
1591      if not stack:
1592        return (-1, None)
1593
1594  # Did not find end of expression or unbalanced parentheses on this line
1595  return (-1, stack)
1596
1597
1598def CloseExpression(clean_lines, linenum, pos):
1599  """If input points to ( or { or [ or <, finds the position that closes it.
1600
1601  If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
1602  linenum/pos that correspond to the closing of the expression.
1603
1604  TODO(unknown): cpplint spends a fair bit of time matching parentheses.
1605  Ideally we would want to index all opening and closing parentheses once
1606  and have CloseExpression be just a simple lookup, but due to preprocessor
1607  tricks, this is not so easy.
1608
1609  Args:
1610    clean_lines: A CleansedLines instance containing the file.
1611    linenum: The number of the line to check.
1612    pos: A position on the line.
1613
1614  Returns:
1615    A tuple (line, linenum, pos) pointer *past* the closing brace, or
1616    (line, len(lines), -1) if we never find a close.  Note we ignore
1617    strings and comments when matching; and the line we return is the
1618    'cleansed' line at linenum.
1619  """
1620
1621  line = clean_lines.elided[linenum]
1622  if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]):
1623    return (line, clean_lines.NumLines(), -1)
1624
1625  # Check first line
1626  (end_pos, stack) = FindEndOfExpressionInLine(line, pos, [])
1627  if end_pos > -1:
1628    return (line, linenum, end_pos)
1629
1630  # Continue scanning forward
1631  while stack and linenum < clean_lines.NumLines() - 1:
1632    linenum += 1
1633    line = clean_lines.elided[linenum]
1634    (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack)
1635    if end_pos > -1:
1636      return (line, linenum, end_pos)
1637
1638  # Did not find end of expression before end of file, give up
1639  return (line, clean_lines.NumLines(), -1)
1640
1641
1642def FindStartOfExpressionInLine(line, endpos, stack):
1643  """Find position at the matching start of current expression.
1644
1645  This is almost the reverse of FindEndOfExpressionInLine, but note
1646  that the input position and returned position differs by 1.
1647
1648  Args:
1649    line: a CleansedLines line.
1650    endpos: start searching at this position.
1651    stack: nesting stack at endpos.
1652
1653  Returns:
1654    On finding matching start: (index at matching start, None)
1655    On finding an unclosed expression: (-1, None)
1656    Otherwise: (-1, new stack at beginning of this line)
1657  """
1658  i = endpos
1659  while i >= 0:
1660    char = line[i]
1661    if char in ')]}':
1662      # Found end of expression, push to expression stack
1663      stack.append(char)
1664    elif char == '>':
1665      # Found potential end of template argument list.
1666      #
1667      # Ignore it if it's a "->" or ">=" or "operator>"
1668      if (i > 0 and
1669          (line[i - 1] == '-' or
1670           Match(r'\s>=\s', line[i - 1:]) or
1671           Search(r'\boperator\s*$', line[0:i]))):
1672        i -= 1
1673      else:
1674        stack.append('>')
1675    elif char == '<':
1676      # Found potential start of template argument list
1677      if i > 0 and line[i - 1] == '<':
1678        # Left shift operator
1679        i -= 1
1680      else:
1681        # If there is a matching '>', we can pop the expression stack.
1682        # Otherwise, ignore this '<' since it must be an operator.
1683        if stack and stack[-1] == '>':
1684          stack.pop()
1685          if not stack:
1686            return (i, None)
1687    elif char in '([{':
1688      # Found start of expression.
1689      #
1690      # If there are any unmatched '>' on the stack, they must be
1691      # operators.  Remove those.
1692      while stack and stack[-1] == '>':
1693        stack.pop()
1694      if not stack:
1695        return (-1, None)
1696      if ((char == '(' and stack[-1] == ')') or
1697          (char == '[' and stack[-1] == ']') or
1698          (char == '{' and stack[-1] == '}')):
1699        stack.pop()
1700        if not stack:
1701          return (i, None)
1702      else:
1703        # Mismatched parentheses
1704        return (-1, None)
1705    elif char == ';':
1706      # Found something that look like end of statements.  If we are currently
1707      # expecting a '<', the matching '>' must have been an operator, since
1708      # template argument list should not contain statements.
1709      while stack and stack[-1] == '>':
1710        stack.pop()
1711      if not stack:
1712        return (-1, None)
1713
1714    i -= 1
1715
1716  return (-1, stack)
1717
1718
1719def ReverseCloseExpression(clean_lines, linenum, pos):
1720  """If input points to ) or } or ] or >, finds the position that opens it.
1721
1722  If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
1723  linenum/pos that correspond to the opening of the expression.
1724
1725  Args:
1726    clean_lines: A CleansedLines instance containing the file.
1727    linenum: The number of the line to check.
1728    pos: A position on the line.
1729
1730  Returns:
1731    A tuple (line, linenum, pos) pointer *at* the opening brace, or
1732    (line, 0, -1) if we never find the matching opening brace.  Note
1733    we ignore strings and comments when matching; and the line we
1734    return is the 'cleansed' line at linenum.
1735  """
1736  line = clean_lines.elided[linenum]
1737  if line[pos] not in ')}]>':
1738    return (line, 0, -1)
1739
1740  # Check last line
1741  (start_pos, stack) = FindStartOfExpressionInLine(line, pos, [])
1742  if start_pos > -1:
1743    return (line, linenum, start_pos)
1744
1745  # Continue scanning backward
1746  while stack and linenum > 0:
1747    linenum -= 1
1748    line = clean_lines.elided[linenum]
1749    (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack)
1750    if start_pos > -1:
1751      return (line, linenum, start_pos)
1752
1753  # Did not find start of expression before beginning of file, give up
1754  return (line, 0, -1)
1755
1756
1757def CheckForCopyright(filename, lines, error):
1758  """Logs an error if no Copyright message appears at the top of the file."""
1759
1760  # We'll say it should occur by line 10. Don't forget there's a
1761  # placeholder line at the front.
1762  for line in xrange(1, min(len(lines), 11)):
1763    if re.search(r'Copyright', lines[line], re.I): break
1764  else:                       # means no copyright line was found
1765    error(filename, 0, 'legal/copyright', 5,
1766          'No copyright message found.  '
1767          'You should have a line: "Copyright [year] <Copyright Owner>"')
1768
1769
1770def GetIndentLevel(line):
1771  """Return the number of leading spaces in line.
1772
1773  Args:
1774    line: A string to check.
1775
1776  Returns:
1777    An integer count of leading spaces, possibly zero.
1778  """
1779  indent = Match(r'^( *)\S', line)
1780  if indent:
1781    return len(indent.group(1))
1782  else:
1783    return 0
1784
1785def PathSplitToList(path):
1786  """Returns the path split into a list by the separator.
1787
1788  Args:
1789    path: An absolute or relative path (e.g. '/a/b/c/' or '../a')
1790
1791  Returns:
1792    A list of path components (e.g. ['a', 'b', 'c]).
1793  """
1794  lst = []
1795  while True:
1796    (head, tail) = os.path.split(path)
1797    if head == path: # absolute paths end
1798      lst.append(head)
1799      break
1800    if tail == path: # relative paths end
1801      lst.append(tail)
1802      break
1803
1804    path = head
1805    lst.append(tail)
1806
1807  lst.reverse()
1808  return lst
1809
1810def GetHeaderGuardCPPVariable(filename):
1811  """Returns the CPP variable that should be used as a header guard.
1812
1813  Args:
1814    filename: The name of a C++ header file.
1815
1816  Returns:
1817    The CPP variable that should be used as a header guard in the
1818    named file.
1819
1820  """
1821
1822  # Restores original filename in case that cpplint is invoked from Emacs's
1823  # flymake.
1824  filename = re.sub(r'_flymake\.h$', '.h', filename)
1825  filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
1826  # Replace 'c++' with 'cpp'.
1827  filename = filename.replace('C++', 'cpp').replace('c++', 'cpp')
1828
1829  fileinfo = FileInfo(filename)
1830  file_path_from_root = fileinfo.RepositoryName()
1831
1832  def FixupPathFromRoot():
1833    if _root_debug:
1834      sys.stderr.write("\n_root fixup, _root = '%s', repository name = '%s'\n"
1835          %(_root, fileinfo.RepositoryName()))
1836
1837    # Process the file path with the --root flag if it was set.
1838    if not _root:
1839      if _root_debug:
1840        sys.stderr.write("_root unspecified\n")
1841      return file_path_from_root
1842
1843    def StripListPrefix(lst, prefix):
1844      # f(['x', 'y'], ['w, z']) -> None  (not a valid prefix)
1845      if lst[:len(prefix)] != prefix:
1846        return None
1847      # f(['a, 'b', 'c', 'd'], ['a', 'b']) -> ['c', 'd']
1848      return lst[(len(prefix)):]
1849
1850    # root behavior:
1851    #   --root=subdir , lstrips subdir from the header guard
1852    maybe_path = StripListPrefix(PathSplitToList(file_path_from_root),
1853                                 PathSplitToList(_root))
1854
1855    if _root_debug:
1856      sys.stderr.write(("_root lstrip (maybe_path=%s, file_path_from_root=%s," +
1857          " _root=%s)\n") %(maybe_path, file_path_from_root, _root))
1858
1859    if maybe_path:
1860      return os.path.join(*maybe_path)
1861
1862    #   --root=.. , will prepend the outer directory to the header guard
1863    full_path = fileinfo.FullName()
1864    root_abspath = os.path.abspath(_root)
1865
1866    maybe_path = StripListPrefix(PathSplitToList(full_path),
1867                                 PathSplitToList(root_abspath))
1868
1869    if _root_debug:
1870      sys.stderr.write(("_root prepend (maybe_path=%s, full_path=%s, " +
1871          "root_abspath=%s)\n") %(maybe_path, full_path, root_abspath))
1872
1873    if maybe_path:
1874      return os.path.join(*maybe_path)
1875
1876    if _root_debug:
1877      sys.stderr.write("_root ignore, returning %s\n" %(file_path_from_root))
1878
1879    #   --root=FAKE_DIR is ignored
1880    return file_path_from_root
1881
1882  file_path_from_root = FixupPathFromRoot()
1883  return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_'
1884
1885
1886def CheckForHeaderGuard(filename, clean_lines, error):
1887  """Checks that the file contains a header guard.
1888
1889  Logs an error if no #ifndef header guard is present.  For other
1890  headers, checks that the full pathname is used.
1891
1892  Args:
1893    filename: The name of the C++ header file.
1894    clean_lines: A CleansedLines instance containing the file.
1895    error: The function to call with any errors found.
1896  """
1897
1898  # Don't check for header guards if there are error suppression
1899  # comments somewhere in this file.
1900  #
1901  # Because this is silencing a warning for a nonexistent line, we
1902  # only support the very specific NOLINT(build/header_guard) syntax,
1903  # and not the general NOLINT or NOLINT(*) syntax.
1904  raw_lines = clean_lines.lines_without_raw_strings
1905  for i in raw_lines:
1906    if Search(r'//\s*NOLINT\(build/header_guard\)', i):
1907      return
1908
1909  cppvar = GetHeaderGuardCPPVariable(filename)
1910
1911  ifndef = ''
1912  ifndef_linenum = 0
1913  define = ''
1914  endif = ''
1915  endif_linenum = 0
1916  for linenum, line in enumerate(raw_lines):
1917    linesplit = line.split()
1918    if len(linesplit) >= 2:
1919      # find the first occurrence of #ifndef and #define, save arg
1920      if not ifndef and linesplit[0] == '#ifndef':
1921        # set ifndef to the header guard presented on the #ifndef line.
1922        ifndef = linesplit[1]
1923        ifndef_linenum = linenum
1924      if not define and linesplit[0] == '#define':
1925        define = linesplit[1]
1926    # find the last occurrence of #endif, save entire line
1927    if line.startswith('#endif'):
1928      endif = line
1929      endif_linenum = linenum
1930
1931  if not ifndef or not define or ifndef != define:
1932    error(filename, 0, 'build/header_guard', 5,
1933          'No #ifndef header guard found, suggested CPP variable is: %s' %
1934          cppvar)
1935    return
1936
1937  # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1938  # for backward compatibility.
1939  if ifndef != cppvar:
1940    error_level = 0
1941    if ifndef != cppvar + '_':
1942      error_level = 5
1943
1944    ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum,
1945                            error)
1946    error(filename, ifndef_linenum, 'build/header_guard', error_level,
1947          '#ifndef header guard has wrong style, please use: %s' % cppvar)
1948
1949  # Check for "//" comments on endif line.
1950  ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum,
1951                          error)
1952  match = Match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif)
1953  if match:
1954    if match.group(1) == '_':
1955      # Issue low severity warning for deprecated double trailing underscore
1956      error(filename, endif_linenum, 'build/header_guard', 0,
1957            '#endif line should be "#endif  // %s"' % cppvar)
1958    return
1959
1960  # Didn't find the corresponding "//" comment.  If this file does not
1961  # contain any "//" comments at all, it could be that the compiler
1962  # only wants "/**/" comments, look for those instead.
1963  no_single_line_comments = True
1964  for i in xrange(1, len(raw_lines) - 1):
1965    line = raw_lines[i]
1966    if Match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line):
1967      no_single_line_comments = False
1968      break
1969
1970  if no_single_line_comments:
1971    match = Match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif)
1972    if match:
1973      if match.group(1) == '_':
1974        # Low severity warning for double trailing underscore
1975        error(filename, endif_linenum, 'build/header_guard', 0,
1976              '#endif line should be "#endif  /* %s */"' % cppvar)
1977      return
1978
1979  # Didn't find anything
1980  error(filename, endif_linenum, 'build/header_guard', 5,
1981        '#endif line should be "#endif  // %s"' % cppvar)
1982
1983
1984def CheckHeaderFileIncluded(filename, include_state, error):
1985  """Logs an error if a .cc file does not include its header."""
1986
1987  # Do not check test files
1988  fileinfo = FileInfo(filename)
1989  if Search(_TEST_FILE_SUFFIX, fileinfo.BaseName()):
1990    return
1991
1992  headerfile = filename[0:len(filename) - len(fileinfo.Extension())] + '.h'
1993  if not os.path.exists(headerfile):
1994    return
1995  headername = FileInfo(headerfile).RepositoryName()
1996  first_include = 0
1997  for section_list in include_state.include_list:
1998    for f in section_list:
1999      if headername in f[0] or f[0] in headername:
2000        return
2001      if not first_include:
2002        first_include = f[1]
2003
2004  error(filename, first_include, 'build/include', 5,
2005        '%s should include its header file %s' % (fileinfo.RepositoryName(),
2006                                                  headername))
2007
2008
2009def CheckForBadCharacters(filename, lines, error):
2010  """Logs an error for each line containing bad characters.
2011
2012  Two kinds of bad characters:
2013
2014  1. Unicode replacement characters: These indicate that either the file
2015  contained invalid UTF-8 (likely) or Unicode replacement characters (which
2016  it shouldn't).  Note that it's possible for this to throw off line
2017  numbering if the invalid UTF-8 occurred adjacent to a newline.
2018
2019  2. NUL bytes.  These are problematic for some tools.
2020
2021  Args:
2022    filename: The name of the current file.
2023    lines: An array of strings, each representing a line of the file.
2024    error: The function to call with any errors found.
2025  """
2026  for linenum, line in enumerate(lines):
2027    if u'\ufffd' in line:
2028      error(filename, linenum, 'readability/utf8', 5,
2029            'Line contains invalid UTF-8 (or Unicode replacement character).')
2030    if '\0' in line:
2031      error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
2032
2033
2034def CheckForNewlineAtEOF(filename, lines, error):
2035  """Logs an error if there is no newline char at the end of the file.
2036
2037  Args:
2038    filename: The name of the current file.
2039    lines: An array of strings, each representing a line of the file.
2040    error: The function to call with any errors found.
2041  """
2042
2043  # The array lines() was created by adding two newlines to the
2044  # original file (go figure), then splitting on \n.
2045  # To verify that the file ends in \n, we just have to make sure the
2046  # last-but-two element of lines() exists and is empty.
2047  if len(lines) < 3 or lines[-2]:
2048    error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
2049          'Could not find a newline character at the end of the file.')
2050
2051
2052def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
2053  """Logs an error if we see /* ... */ or "..." that extend past one line.
2054
2055  /* ... */ comments are legit inside macros, for one line.
2056  Otherwise, we prefer // comments, so it's ok to warn about the
2057  other.  Likewise, it's ok for strings to extend across multiple
2058  lines, as long as a line continuation character (backslash)
2059  terminates each line. Although not currently prohibited by the C++
2060  style guide, it's ugly and unnecessary. We don't do well with either
2061  in this lint program, so we warn about both.
2062
2063  Args:
2064    filename: The name of the current file.
2065    clean_lines: A CleansedLines instance containing the file.
2066    linenum: The number of the line to check.
2067    error: The function to call with any errors found.
2068  """
2069  line = clean_lines.elided[linenum]
2070
2071  # Remove all \\ (escaped backslashes) from the line. They are OK, and the
2072  # second (escaped) slash may trigger later \" detection erroneously.
2073  line = line.replace('\\\\', '')
2074
2075  if line.count('/*') > line.count('*/'):
2076    error(filename, linenum, 'readability/multiline_comment', 5,
2077          'Complex multi-line /*...*/-style comment found. '
2078          'Lint may give bogus warnings.  '
2079          'Consider replacing these with //-style comments, '
2080          'with #if 0...#endif, '
2081          'or with more clearly structured multi-line comments.')
2082
2083  if (line.count('"') - line.count('\\"')) % 2:
2084    error(filename, linenum, 'readability/multiline_string', 5,
2085          'Multi-line string ("...") found.  This lint script doesn\'t '
2086          'do well with such strings, and may give bogus warnings.  '
2087          'Use C++11 raw strings or concatenation instead.')
2088
2089
2090# (non-threadsafe name, thread-safe alternative, validation pattern)
2091#
2092# The validation pattern is used to eliminate false positives such as:
2093#  _rand();               // false positive due to substring match.
2094#  ->rand();              // some member function rand().
2095#  ACMRandom rand(seed);  // some variable named rand.
2096#  ISAACRandom rand();    // another variable named rand.
2097#
2098# Basically we require the return value of these functions to be used
2099# in some expression context on the same line by matching on some
2100# operator before the function name.  This eliminates constructors and
2101# member function calls.
2102_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)'
2103_THREADING_LIST = (
2104    ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'),
2105    ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'),
2106    ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'),
2107    ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'),
2108    ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'),
2109    ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'),
2110    ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'),
2111    ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'),
2112    ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'),
2113    ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'),
2114    ('strtok(', 'strtok_r(',
2115     _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'),
2116    ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'),
2117    )
2118
2119
2120def CheckPosixThreading(filename, clean_lines, linenum, error):
2121  """Checks for calls to thread-unsafe functions.
2122
2123  Much code has been originally written without consideration of
2124  multi-threading. Also, engineers are relying on their old experience;
2125  they have learned posix before threading extensions were added. These
2126  tests guide the engineers to use thread-safe functions (when using
2127  posix directly).
2128
2129  Args:
2130    filename: The name of the current file.
2131    clean_lines: A CleansedLines instance containing the file.
2132    linenum: The number of the line to check.
2133    error: The function to call with any errors found.
2134  """
2135  line = clean_lines.elided[linenum]
2136  for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST:
2137    # Additional pattern matching check to confirm that this is the
2138    # function we are looking for
2139    if Search(pattern, line):
2140      error(filename, linenum, 'runtime/threadsafe_fn', 2,
2141            'Consider using ' + multithread_safe_func +
2142            '...) instead of ' + single_thread_func +
2143            '...) for improved thread safety.')
2144
2145
2146def CheckVlogArguments(filename, clean_lines, linenum, error):
2147  """Checks that VLOG() is only used for defining a logging level.
2148
2149  For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
2150  VLOG(FATAL) are not.
2151
2152  Args:
2153    filename: The name of the current file.
2154    clean_lines: A CleansedLines instance containing the file.
2155    linenum: The number of the line to check.
2156    error: The function to call with any errors found.
2157  """
2158  line = clean_lines.elided[linenum]
2159  if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
2160    error(filename, linenum, 'runtime/vlog', 5,
2161          'VLOG() should be used with numeric verbosity level.  '
2162          'Use LOG() if you want symbolic severity levels.')
2163
2164# Matches invalid increment: *count++, which moves pointer instead of
2165# incrementing a value.
2166_RE_PATTERN_INVALID_INCREMENT = re.compile(
2167    r'^\s*\*\w+(\+\+|--);')
2168
2169
2170def CheckInvalidIncrement(filename, clean_lines, linenum, error):
2171  """Checks for invalid increment *count++.
2172
2173  For example following function:
2174  void increment_counter(int* count) {
2175    *count++;
2176  }
2177  is invalid, because it effectively does count++, moving pointer, and should
2178  be replaced with ++*count, (*count)++ or *count += 1.
2179
2180  Args:
2181    filename: The name of the current file.
2182    clean_lines: A CleansedLines instance containing the file.
2183    linenum: The number of the line to check.
2184    error: The function to call with any errors found.
2185  """
2186  line = clean_lines.elided[linenum]
2187  if _RE_PATTERN_INVALID_INCREMENT.match(line):
2188    error(filename, linenum, 'runtime/invalid_increment', 5,
2189          'Changing pointer instead of value (or unused value of operator*).')
2190
2191
2192def IsMacroDefinition(clean_lines, linenum):
2193  if Search(r'^#define', clean_lines[linenum]):
2194    return True
2195
2196  if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]):
2197    return True
2198
2199  return False
2200
2201
2202def IsForwardClassDeclaration(clean_lines, linenum):
2203  return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum])
2204
2205
2206class _BlockInfo(object):
2207  """Stores information about a generic block of code."""
2208
2209  def __init__(self, linenum, seen_open_brace):
2210    self.starting_linenum = linenum
2211    self.seen_open_brace = seen_open_brace
2212    self.open_parentheses = 0
2213    self.inline_asm = _NO_ASM
2214    self.check_namespace_indentation = False
2215
2216  def CheckBegin(self, filename, clean_lines, linenum, error):
2217    """Run checks that applies to text up to the opening brace.
2218
2219    This is mostly for checking the text after the class identifier
2220    and the "{", usually where the base class is specified.  For other
2221    blocks, there isn't much to check, so we always pass.
2222
2223    Args:
2224      filename: The name of the current file.
2225      clean_lines: A CleansedLines instance containing the file.
2226      linenum: The number of the line to check.
2227      error: The function to call with any errors found.
2228    """
2229    pass
2230
2231  def CheckEnd(self, filename, clean_lines, linenum, error):
2232    """Run checks that applies to text after the closing brace.
2233
2234    This is mostly used for checking end of namespace comments.
2235
2236    Args:
2237      filename: The name of the current file.
2238      clean_lines: A CleansedLines instance containing the file.
2239      linenum: The number of the line to check.
2240      error: The function to call with any errors found.
2241    """
2242    pass
2243
2244  def IsBlockInfo(self):
2245    """Returns true if this block is a _BlockInfo.
2246
2247    This is convenient for verifying that an object is an instance of
2248    a _BlockInfo, but not an instance of any of the derived classes.
2249
2250    Returns:
2251      True for this class, False for derived classes.
2252    """
2253    return self.__class__ == _BlockInfo
2254
2255
2256class _ExternCInfo(_BlockInfo):
2257  """Stores information about an 'extern "C"' block."""
2258
2259  def __init__(self, linenum):
2260    _BlockInfo.__init__(self, linenum, True)
2261
2262
2263class _ClassInfo(_BlockInfo):
2264  """Stores information about a class."""
2265
2266  def __init__(self, name, class_or_struct, clean_lines, linenum):
2267    _BlockInfo.__init__(self, linenum, False)
2268    self.name = name
2269    self.is_derived = False
2270    self.check_namespace_indentation = True
2271    if class_or_struct == 'struct':
2272      self.access = 'public'
2273      self.is_struct = True
2274    else:
2275      self.access = 'private'
2276      self.is_struct = False
2277
2278    # Remember initial indentation level for this class.  Using raw_lines here
2279    # instead of elided to account for leading comments.
2280    self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum])
2281
2282    # Try to find the end of the class.  This will be confused by things like:
2283    #   class A {
2284    #   } *x = { ...
2285    #
2286    # But it's still good enough for CheckSectionSpacing.
2287    self.last_line = 0
2288    depth = 0
2289    for i in range(linenum, clean_lines.NumLines()):
2290      line = clean_lines.elided[i]
2291      depth += line.count('{') - line.count('}')
2292      if not depth:
2293        self.last_line = i
2294        break
2295
2296  def CheckBegin(self, filename, clean_lines, linenum, error):
2297    # Look for a bare ':'
2298    if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
2299      self.is_derived = True
2300
2301  def CheckEnd(self, filename, clean_lines, linenum, error):
2302    # If there is a DISALLOW macro, it should appear near the end of
2303    # the class.
2304    seen_last_thing_in_class = False
2305    for i in xrange(linenum - 1, self.starting_linenum, -1):
2306      match = Search(
2307          r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' +
2308          self.name + r'\)',
2309          clean_lines.elided[i])
2310      if match:
2311        if seen_last_thing_in_class:
2312          error(filename, i, 'readability/constructors', 3,
2313                match.group(1) + ' should be the last thing in the class')
2314        break
2315
2316      if not Match(r'^\s*$', clean_lines.elided[i]):
2317        seen_last_thing_in_class = True
2318
2319    # Check that closing brace is aligned with beginning of the class.
2320    # Only do this if the closing brace is indented by only whitespaces.
2321    # This means we will not check single-line class definitions.
2322    indent = Match(r'^( *)\}', clean_lines.elided[linenum])
2323    if indent and len(indent.group(1)) != self.class_indent:
2324      if self.is_struct:
2325        parent = 'struct ' + self.name
2326      else:
2327        parent = 'class ' + self.name
2328      error(filename, linenum, 'whitespace/indent', 3,
2329            'Closing brace should be aligned with beginning of %s' % parent)
2330
2331
2332class _NamespaceInfo(_BlockInfo):
2333  """Stores information about a namespace."""
2334
2335  def __init__(self, name, linenum):
2336    _BlockInfo.__init__(self, linenum, False)
2337    self.name = name or ''
2338    self.check_namespace_indentation = True
2339
2340  def CheckEnd(self, filename, clean_lines, linenum, error):
2341    """Check end of namespace comments."""
2342    line = clean_lines.raw_lines[linenum]
2343
2344    # Check how many lines is enclosed in this namespace.  Don't issue
2345    # warning for missing namespace comments if there aren't enough
2346    # lines.  However, do apply checks if there is already an end of
2347    # namespace comment and it's incorrect.
2348    #
2349    # TODO(unknown): We always want to check end of namespace comments
2350    # if a namespace is large, but sometimes we also want to apply the
2351    # check if a short namespace contained nontrivial things (something
2352    # other than forward declarations).  There is currently no logic on
2353    # deciding what these nontrivial things are, so this check is
2354    # triggered by namespace size only, which works most of the time.
2355    if (linenum - self.starting_linenum < 10
2356        and not Match(r'^\s*};*\s*(//|/\*).*\bnamespace\b', line)):
2357      return
2358
2359    # Look for matching comment at end of namespace.
2360    #
2361    # Note that we accept C style "/* */" comments for terminating
2362    # namespaces, so that code that terminate namespaces inside
2363    # preprocessor macros can be cpplint clean.
2364    #
2365    # We also accept stuff like "// end of namespace <name>." with the
2366    # period at the end.
2367    #
2368    # Besides these, we don't accept anything else, otherwise we might
2369    # get false negatives when existing comment is a substring of the
2370    # expected namespace.
2371    if self.name:
2372      # Named namespace
2373      if not Match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' +
2374                    re.escape(self.name) + r'[\*/\.\\\s]*$'),
2375                   line):
2376        error(filename, linenum, 'readability/namespace', 5,
2377              'Namespace should be terminated with "// namespace %s"' %
2378              self.name)
2379    else:
2380      # Anonymous namespace
2381      if not Match(r'^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
2382        # If "// namespace anonymous" or "// anonymous namespace (more text)",
2383        # mention "// anonymous namespace" as an acceptable form
2384        if Match(r'^\s*}.*\b(namespace anonymous|anonymous namespace)\b', line):
2385          error(filename, linenum, 'readability/namespace', 5,
2386                'Anonymous namespace should be terminated with "// namespace"'
2387                ' or "// anonymous namespace"')
2388        else:
2389          error(filename, linenum, 'readability/namespace', 5,
2390                'Anonymous namespace should be terminated with "// namespace"')
2391
2392
2393class _PreprocessorInfo(object):
2394  """Stores checkpoints of nesting stacks when #if/#else is seen."""
2395
2396  def __init__(self, stack_before_if):
2397    # The entire nesting stack before #if
2398    self.stack_before_if = stack_before_if
2399
2400    # The entire nesting stack up to #else
2401    self.stack_before_else = []
2402
2403    # Whether we have already seen #else or #elif
2404    self.seen_else = False
2405
2406
2407class NestingState(object):
2408  """Holds states related to parsing braces."""
2409
2410  def __init__(self):
2411    # Stack for tracking all braces.  An object is pushed whenever we
2412    # see a "{", and popped when we see a "}".  Only 3 types of
2413    # objects are possible:
2414    # - _ClassInfo: a class or struct.
2415    # - _NamespaceInfo: a namespace.
2416    # - _BlockInfo: some other type of block.
2417    self.stack = []
2418
2419    # Top of the previous stack before each Update().
2420    #
2421    # Because the nesting_stack is updated at the end of each line, we
2422    # had to do some convoluted checks to find out what is the current
2423    # scope at the beginning of the line.  This check is simplified by
2424    # saving the previous top of nesting stack.
2425    #
2426    # We could save the full stack, but we only need the top.  Copying
2427    # the full nesting stack would slow down cpplint by ~10%.
2428    self.previous_stack_top = []
2429
2430    # Stack of _PreprocessorInfo objects.
2431    self.pp_stack = []
2432
2433  def SeenOpenBrace(self):
2434    """Check if we have seen the opening brace for the innermost block.
2435
2436    Returns:
2437      True if we have seen the opening brace, False if the innermost
2438      block is still expecting an opening brace.
2439    """
2440    return (not self.stack) or self.stack[-1].seen_open_brace
2441
2442  def InNamespaceBody(self):
2443    """Check if we are currently one level inside a namespace body.
2444
2445    Returns:
2446      True if top of the stack is a namespace block, False otherwise.
2447    """
2448    return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
2449
2450  def InExternC(self):
2451    """Check if we are currently one level inside an 'extern "C"' block.
2452
2453    Returns:
2454      True if top of the stack is an extern block, False otherwise.
2455    """
2456    return self.stack and isinstance(self.stack[-1], _ExternCInfo)
2457
2458  def InClassDeclaration(self):
2459    """Check if we are currently one level inside a class or struct declaration.
2460
2461    Returns:
2462      True if top of the stack is a class/struct, False otherwise.
2463    """
2464    return self.stack and isinstance(self.stack[-1], _ClassInfo)
2465
2466  def InAsmBlock(self):
2467    """Check if we are currently one level inside an inline ASM block.
2468
2469    Returns:
2470      True if the top of the stack is a block containing inline ASM.
2471    """
2472    return self.stack and self.stack[-1].inline_asm != _NO_ASM
2473
2474  def InTemplateArgumentList(self, clean_lines, linenum, pos):
2475    """Check if current position is inside template argument list.
2476
2477    Args:
2478      clean_lines: A CleansedLines instance containing the file.
2479      linenum: The number of the line to check.
2480      pos: position just after the suspected template argument.
2481    Returns:
2482      True if (linenum, pos) is inside template arguments.
2483    """
2484    while linenum < clean_lines.NumLines():
2485      # Find the earliest character that might indicate a template argument
2486      line = clean_lines.elided[linenum]
2487      match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:])
2488      if not match:
2489        linenum += 1
2490        pos = 0
2491        continue
2492      token = match.group(1)
2493      pos += len(match.group(0))
2494
2495      # These things do not look like template argument list:
2496      #   class Suspect {
2497      #   class Suspect x; }
2498      if token in ('{', '}', ';'): return False
2499
2500      # These things look like template argument list:
2501      #   template <class Suspect>
2502      #   template <class Suspect = default_value>
2503      #   template <class Suspect[]>
2504      #   template <class Suspect...>
2505      if token in ('>', '=', '[', ']', '.'): return True
2506
2507      # Check if token is an unmatched '<'.
2508      # If not, move on to the next character.
2509      if token != '<':
2510        pos += 1
2511        if pos >= len(line):
2512          linenum += 1
2513          pos = 0
2514        continue
2515
2516      # We can't be sure if we just find a single '<', and need to
2517      # find the matching '>'.
2518      (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1)
2519      if end_pos < 0:
2520        # Not sure if template argument list or syntax error in file
2521        return False
2522      linenum = end_line
2523      pos = end_pos
2524    return False
2525
2526  def UpdatePreprocessor(self, line):
2527    """Update preprocessor stack.
2528
2529    We need to handle preprocessors due to classes like this:
2530      #ifdef SWIG
2531      struct ResultDetailsPageElementExtensionPoint {
2532      #else
2533      struct ResultDetailsPageElementExtensionPoint : public Extension {
2534      #endif
2535
2536    We make the following assumptions (good enough for most files):
2537    - Preprocessor condition evaluates to true from #if up to first
2538      #else/#elif/#endif.
2539
2540    - Preprocessor condition evaluates to false from #else/#elif up
2541      to #endif.  We still perform lint checks on these lines, but
2542      these do not affect nesting stack.
2543
2544    Args:
2545      line: current line to check.
2546    """
2547    if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
2548      # Beginning of #if block, save the nesting stack here.  The saved
2549      # stack will allow us to restore the parsing state in the #else case.
2550      self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
2551    elif Match(r'^\s*#\s*(else|elif)\b', line):
2552      # Beginning of #else block
2553      if self.pp_stack:
2554        if not self.pp_stack[-1].seen_else:
2555          # This is the first #else or #elif block.  Remember the
2556          # whole nesting stack up to this point.  This is what we
2557          # keep after the #endif.
2558          self.pp_stack[-1].seen_else = True
2559          self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
2560
2561        # Restore the stack to how it was before the #if
2562        self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
2563      else:
2564        # TODO(unknown): unexpected #else, issue warning?
2565        pass
2566    elif Match(r'^\s*#\s*endif\b', line):
2567      # End of #if or #else blocks.
2568      if self.pp_stack:
2569        # If we saw an #else, we will need to restore the nesting
2570        # stack to its former state before the #else, otherwise we
2571        # will just continue from where we left off.
2572        if self.pp_stack[-1].seen_else:
2573          # Here we can just use a shallow copy since we are the last
2574          # reference to it.
2575          self.stack = self.pp_stack[-1].stack_before_else
2576        # Drop the corresponding #if
2577        self.pp_stack.pop()
2578      else:
2579        # TODO(unknown): unexpected #endif, issue warning?
2580        pass
2581
2582  # TODO(unknown): Update() is too long, but we will refactor later.
2583  def Update(self, filename, clean_lines, linenum, error):
2584    """Update nesting state with current line.
2585
2586    Args:
2587      filename: The name of the current file.
2588      clean_lines: A CleansedLines instance containing the file.
2589      linenum: The number of the line to check.
2590      error: The function to call with any errors found.
2591    """
2592    line = clean_lines.elided[linenum]
2593
2594    # Remember top of the previous nesting stack.
2595    #
2596    # The stack is always pushed/popped and not modified in place, so
2597    # we can just do a shallow copy instead of copy.deepcopy.  Using
2598    # deepcopy would slow down cpplint by ~28%.
2599    if self.stack:
2600      self.previous_stack_top = self.stack[-1]
2601    else:
2602      self.previous_stack_top = None
2603
2604    # Update pp_stack
2605    self.UpdatePreprocessor(line)
2606
2607    # Count parentheses.  This is to avoid adding struct arguments to
2608    # the nesting stack.
2609    if self.stack:
2610      inner_block = self.stack[-1]
2611      depth_change = line.count('(') - line.count(')')
2612      inner_block.open_parentheses += depth_change
2613
2614      # Also check if we are starting or ending an inline assembly block.
2615      if inner_block.inline_asm in (_NO_ASM, _END_ASM):
2616        if (depth_change != 0 and
2617            inner_block.open_parentheses == 1 and
2618            _MATCH_ASM.match(line)):
2619          # Enter assembly block
2620          inner_block.inline_asm = _INSIDE_ASM
2621        else:
2622          # Not entering assembly block.  If previous line was _END_ASM,
2623          # we will now shift to _NO_ASM state.
2624          inner_block.inline_asm = _NO_ASM
2625      elif (inner_block.inline_asm == _INSIDE_ASM and
2626            inner_block.open_parentheses == 0):
2627        # Exit assembly block
2628        inner_block.inline_asm = _END_ASM
2629
2630    # Consume namespace declaration at the beginning of the line.  Do
2631    # this in a loop so that we catch same line declarations like this:
2632    #   namespace proto2 { namespace bridge { class MessageSet; } }
2633    while True:
2634      # Match start of namespace.  The "\b\s*" below catches namespace
2635      # declarations even if it weren't followed by a whitespace, this
2636      # is so that we don't confuse our namespace checker.  The
2637      # missing spaces will be flagged by CheckSpacing.
2638      namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
2639      if not namespace_decl_match:
2640        break
2641
2642      new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
2643      self.stack.append(new_namespace)
2644
2645      line = namespace_decl_match.group(2)
2646      if line.find('{') != -1:
2647        new_namespace.seen_open_brace = True
2648        line = line[line.find('{') + 1:]
2649
2650    # Look for a class declaration in whatever is left of the line
2651    # after parsing namespaces.  The regexp accounts for decorated classes
2652    # such as in:
2653    #   class LOCKABLE API Object {
2654    #   };
2655    class_decl_match = Match(
2656        r'^(\s*(?:template\s*<[\w\s<>,:]*>\s*)?'
2657        r'(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))'
2658        r'(.*)$', line)
2659    if (class_decl_match and
2660        (not self.stack or self.stack[-1].open_parentheses == 0)):
2661      # We do not want to accept classes that are actually template arguments:
2662      #   template <class Ignore1,
2663      #             class Ignore2 = Default<Args>,
2664      #             template <Args> class Ignore3>
2665      #   void Function() {};
2666      #
2667      # To avoid template argument cases, we scan forward and look for
2668      # an unmatched '>'.  If we see one, assume we are inside a
2669      # template argument list.
2670      end_declaration = len(class_decl_match.group(1))
2671      if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration):
2672        self.stack.append(_ClassInfo(
2673            class_decl_match.group(3), class_decl_match.group(2),
2674            clean_lines, linenum))
2675        line = class_decl_match.group(4)
2676
2677    # If we have not yet seen the opening brace for the innermost block,
2678    # run checks here.
2679    if not self.SeenOpenBrace():
2680      self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
2681
2682    # Update access control if we are inside a class/struct
2683    if self.stack and isinstance(self.stack[-1], _ClassInfo):
2684      classinfo = self.stack[-1]
2685      access_match = Match(
2686          r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
2687          r':(?:[^:]|$)',
2688          line)
2689      if access_match:
2690        classinfo.access = access_match.group(2)
2691
2692        # Check that access keywords are indented +1 space.  Skip this
2693        # check if the keywords are not preceded by whitespaces.
2694        indent = access_match.group(1)
2695        if (len(indent) != classinfo.class_indent + 1 and
2696            Match(r'^\s*$', indent)):
2697          if classinfo.is_struct:
2698            parent = 'struct ' + classinfo.name
2699          else:
2700            parent = 'class ' + classinfo.name
2701          slots = ''
2702          if access_match.group(3):
2703            slots = access_match.group(3)
2704          error(filename, linenum, 'whitespace/indent', 3,
2705                '%s%s: should be indented +1 space inside %s' % (
2706                    access_match.group(2), slots, parent))
2707
2708    # Consume braces or semicolons from what's left of the line
2709    while True:
2710      # Match first brace, semicolon, or closed parenthesis.
2711      matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
2712      if not matched:
2713        break
2714
2715      token = matched.group(1)
2716      if token == '{':
2717        # If namespace or class hasn't seen a opening brace yet, mark
2718        # namespace/class head as complete.  Push a new block onto the
2719        # stack otherwise.
2720        if not self.SeenOpenBrace():
2721          self.stack[-1].seen_open_brace = True
2722        elif Match(r'^extern\s*"[^"]*"\s*\{', line):
2723          self.stack.append(_ExternCInfo(linenum))
2724        else:
2725          self.stack.append(_BlockInfo(linenum, True))
2726          if _MATCH_ASM.match(line):
2727            self.stack[-1].inline_asm = _BLOCK_ASM
2728
2729      elif token == ';' or token == ')':
2730        # If we haven't seen an opening brace yet, but we already saw
2731        # a semicolon, this is probably a forward declaration.  Pop
2732        # the stack for these.
2733        #
2734        # Similarly, if we haven't seen an opening brace yet, but we
2735        # already saw a closing parenthesis, then these are probably
2736        # function arguments with extra "class" or "struct" keywords.
2737        # Also pop these stack for these.
2738        if not self.SeenOpenBrace():
2739          self.stack.pop()
2740      else:  # token == '}'
2741        # Perform end of block checks and pop the stack.
2742        if self.stack:
2743          self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
2744          self.stack.pop()
2745      line = matched.group(2)
2746
2747  def InnermostClass(self):
2748    """Get class info on the top of the stack.
2749
2750    Returns:
2751      A _ClassInfo object if we are inside a class, or None otherwise.
2752    """
2753    for i in range(len(self.stack), 0, -1):
2754      classinfo = self.stack[i - 1]
2755      if isinstance(classinfo, _ClassInfo):
2756        return classinfo
2757    return None
2758
2759  def CheckCompletedBlocks(self, filename, error):
2760    """Checks that all classes and namespaces have been completely parsed.
2761
2762    Call this when all lines in a file have been processed.
2763    Args:
2764      filename: The name of the current file.
2765      error: The function to call with any errors found.
2766    """
2767    # Note: This test can result in false positives if #ifdef constructs
2768    # get in the way of brace matching. See the testBuildClass test in
2769    # cpplint_unittest.py for an example of this.
2770    for obj in self.stack:
2771      if isinstance(obj, _ClassInfo):
2772        error(filename, obj.starting_linenum, 'build/class', 5,
2773              'Failed to find complete declaration of class %s' %
2774              obj.name)
2775      elif isinstance(obj, _NamespaceInfo):
2776        error(filename, obj.starting_linenum, 'build/namespaces', 5,
2777              'Failed to find complete declaration of namespace %s' %
2778              obj.name)
2779
2780
2781def CheckForNonStandardConstructs(filename, clean_lines, linenum,
2782                                  nesting_state, error):
2783  r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
2784
2785  Complain about several constructs which gcc-2 accepts, but which are
2786  not standard C++.  Warning about these in lint is one way to ease the
2787  transition to new compilers.
2788  - put storage class first (e.g. "static const" instead of "const static").
2789  - "%lld" instead of %qd" in printf-type functions.
2790  - "%1$d" is non-standard in printf-type functions.
2791  - "\%" is an undefined character escape sequence.
2792  - text after #endif is not allowed.
2793  - invalid inner-style forward declaration.
2794  - >? and <? operators, and their >?= and <?= cousins.
2795
2796  Additionally, check for constructor/destructor style violations and reference
2797  members, as it is very convenient to do so while checking for
2798  gcc-2 compliance.
2799
2800  Args:
2801    filename: The name of the current file.
2802    clean_lines: A CleansedLines instance containing the file.
2803    linenum: The number of the line to check.
2804    nesting_state: A NestingState instance which maintains information about
2805                   the current stack of nested blocks being parsed.
2806    error: A callable to which errors are reported, which takes 4 arguments:
2807           filename, line number, error level, and message
2808  """
2809
2810  # Remove comments from the line, but leave in strings for now.
2811  line = clean_lines.lines[linenum]
2812
2813  if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
2814    error(filename, linenum, 'runtime/printf_format', 3,
2815          '%q in format strings is deprecated.  Use %ll instead.')
2816
2817  if Search(r'printf\s*\(.*".*%\d+\$', line):
2818    error(filename, linenum, 'runtime/printf_format', 2,
2819          '%N$ formats are unconventional.  Try rewriting to avoid them.')
2820
2821  # Remove escaped backslashes before looking for undefined escapes.
2822  line = line.replace('\\\\', '')
2823
2824  if Search(r'("|\').*\\(%|\[|\(|{)', line):
2825    error(filename, linenum, 'build/printf_format', 3,
2826          '%, [, (, and { are undefined character escapes.  Unescape them.')
2827
2828  # For the rest, work with both comments and strings removed.
2829  line = clean_lines.elided[linenum]
2830
2831  if Search(r'\b(const|volatile|void|char|short|int|long'
2832            r'|float|double|signed|unsigned'
2833            r'|schar|u?int8|u?int16|u?int32|u?int64)'
2834            r'\s+(register|static|extern|typedef)\b',
2835            line):
2836    error(filename, linenum, 'build/storage_class', 5,
2837          'Storage-class specifier (static, extern, typedef, etc) should be '
2838          'at the beginning of the declaration.')
2839
2840  if Match(r'\s*#\s*endif\s*[^/\s]+', line):
2841    error(filename, linenum, 'build/endif_comment', 5,
2842          'Uncommented text after #endif is non-standard.  Use a comment.')
2843
2844  if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
2845    error(filename, linenum, 'build/forward_decl', 5,
2846          'Inner-style forward declarations are invalid.  Remove this line.')
2847
2848  if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
2849            line):
2850    error(filename, linenum, 'build/deprecated', 3,
2851          '>? and <? (max and min) operators are non-standard and deprecated.')
2852
2853  if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
2854    # TODO(unknown): Could it be expanded safely to arbitrary references,
2855    # without triggering too many false positives? The first
2856    # attempt triggered 5 warnings for mostly benign code in the regtest, hence
2857    # the restriction.
2858    # Here's the original regexp, for the reference:
2859    # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
2860    # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
2861    error(filename, linenum, 'runtime/member_string_references', 2,
2862          'const string& members are dangerous. It is much better to use '
2863          'alternatives, such as pointers or simple constants.')
2864
2865  # Everything else in this function operates on class declarations.
2866  # Return early if the top of the nesting stack is not a class, or if
2867  # the class head is not completed yet.
2868  classinfo = nesting_state.InnermostClass()
2869  if not classinfo or not classinfo.seen_open_brace:
2870    return
2871
2872  # The class may have been declared with namespace or classname qualifiers.
2873  # The constructor and destructor will not have those qualifiers.
2874  base_classname = classinfo.name.split('::')[-1]
2875
2876  # Look for single-argument constructors that aren't marked explicit.
2877  # Technically a valid construct, but against style.
2878  explicit_constructor_match = Match(
2879      r'\s+(?:(?:inline|constexpr)\s+)*(explicit\s+)?'
2880      r'(?:(?:inline|constexpr)\s+)*%s\s*'
2881      r'\(((?:[^()]|\([^()]*\))*)\)'
2882      % re.escape(base_classname),
2883      line)
2884
2885  if explicit_constructor_match:
2886    is_marked_explicit = explicit_constructor_match.group(1)
2887
2888    if not explicit_constructor_match.group(2):
2889      constructor_args = []
2890    else:
2891      constructor_args = explicit_constructor_match.group(2).split(',')
2892
2893    # collapse arguments so that commas in template parameter lists and function
2894    # argument parameter lists don't split arguments in two
2895    i = 0
2896    while i < len(constructor_args):
2897      constructor_arg = constructor_args[i]
2898      while (constructor_arg.count('<') > constructor_arg.count('>') or
2899             constructor_arg.count('(') > constructor_arg.count(')')):
2900        constructor_arg += ',' + constructor_args[i + 1]
2901        del constructor_args[i + 1]
2902      constructor_args[i] = constructor_arg
2903      i += 1
2904
2905    defaulted_args = [arg for arg in constructor_args if '=' in arg]
2906    noarg_constructor = (not constructor_args or  # empty arg list
2907                         # 'void' arg specifier
2908                         (len(constructor_args) == 1 and
2909                          constructor_args[0].strip() == 'void'))
2910    onearg_constructor = ((len(constructor_args) == 1 and  # exactly one arg
2911                           not noarg_constructor) or
2912                          # all but at most one arg defaulted
2913                          (len(constructor_args) >= 1 and
2914                           not noarg_constructor and
2915                           len(defaulted_args) >= len(constructor_args) - 1))
2916    initializer_list_constructor = bool(
2917        onearg_constructor and
2918        Search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0]))
2919    copy_constructor = bool(
2920        onearg_constructor and
2921        Match(r'(const\s+)?%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&'
2922              % re.escape(base_classname), constructor_args[0].strip()))
2923
2924    if (not is_marked_explicit and
2925        onearg_constructor and
2926        not initializer_list_constructor and
2927        not copy_constructor):
2928      if defaulted_args:
2929        error(filename, linenum, 'runtime/explicit', 5,
2930              'Constructors callable with one argument '
2931              'should be marked explicit.')
2932      else:
2933        error(filename, linenum, 'runtime/explicit', 5,
2934              'Single-parameter constructors should be marked explicit.')
2935    elif is_marked_explicit and not onearg_constructor:
2936      if noarg_constructor:
2937        error(filename, linenum, 'runtime/explicit', 5,
2938              'Zero-parameter constructors should not be marked explicit.')
2939
2940
2941def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error):
2942  """Checks for the correctness of various spacing around function calls.
2943
2944  Args:
2945    filename: The name of the current file.
2946    clean_lines: A CleansedLines instance containing the file.
2947    linenum: The number of the line to check.
2948    error: The function to call with any errors found.
2949  """
2950  line = clean_lines.elided[linenum]
2951
2952  # Since function calls often occur inside if/for/while/switch
2953  # expressions - which have their own, more liberal conventions - we
2954  # first see if we should be looking inside such an expression for a
2955  # function call, to which we can apply more strict standards.
2956  fncall = line    # if there's no control flow construct, look at whole line
2957  for pattern in (r'\bif\s*\((.*)\)\s*{',
2958                  r'\bfor\s*\((.*)\)\s*{',
2959                  r'\bwhile\s*\((.*)\)\s*[{;]',
2960                  r'\bswitch\s*\((.*)\)\s*{'):
2961    match = Search(pattern, line)
2962    if match:
2963      fncall = match.group(1)    # look inside the parens for function calls
2964      break
2965
2966  # Except in if/for/while/switch, there should never be space
2967  # immediately inside parens (eg "f( 3, 4 )").  We make an exception
2968  # for nested parens ( (a+b) + c ).  Likewise, there should never be
2969  # a space before a ( when it's a function argument.  I assume it's a
2970  # function argument when the char before the whitespace is legal in
2971  # a function name (alnum + _) and we're not starting a macro. Also ignore
2972  # pointers and references to arrays and functions coz they're too tricky:
2973  # we use a very simple way to recognize these:
2974  # " (something)(maybe-something)" or
2975  # " (something)(maybe-something," or
2976  # " (something)[something]"
2977  # Note that we assume the contents of [] to be short enough that
2978  # they'll never need to wrap.
2979  if (  # Ignore control structures.
2980      not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b',
2981                 fncall) and
2982      # Ignore pointers/references to functions.
2983      not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
2984      # Ignore pointers/references to arrays.
2985      not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
2986    if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
2987      error(filename, linenum, 'whitespace/parens', 4,
2988            'Extra space after ( in function call')
2989    elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
2990      error(filename, linenum, 'whitespace/parens', 2,
2991            'Extra space after (')
2992    if (Search(r'\w\s+\(', fncall) and
2993        not Search(r'_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(', fncall) and
2994        not Search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and
2995        not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and
2996        not Search(r'\bcase\s+\(', fncall)):
2997      # TODO(unknown): Space after an operator function seem to be a common
2998      # error, silence those for now by restricting them to highest verbosity.
2999      if Search(r'\boperator_*\b', line):
3000        error(filename, linenum, 'whitespace/parens', 0,
3001              'Extra space before ( in function call')
3002      else:
3003        error(filename, linenum, 'whitespace/parens', 4,
3004              'Extra space before ( in function call')
3005    # If the ) is followed only by a newline or a { + newline, assume it's
3006    # part of a control statement (if/while/etc), and don't complain
3007    if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
3008      # If the closing parenthesis is preceded by only whitespaces,
3009      # try to give a more descriptive error message.
3010      if Search(r'^\s+\)', fncall):
3011        error(filename, linenum, 'whitespace/parens', 2,
3012              'Closing ) should be moved to the previous line')
3013      else:
3014        error(filename, linenum, 'whitespace/parens', 2,
3015              'Extra space before )')
3016
3017
3018def IsBlankLine(line):
3019  """Returns true if the given line is blank.
3020
3021  We consider a line to be blank if the line is empty or consists of
3022  only white spaces.
3023
3024  Args:
3025    line: A line of a string.
3026
3027  Returns:
3028    True, if the given line is blank.
3029  """
3030  return not line or line.isspace()
3031
3032
3033def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line,
3034                                 error):
3035  is_namespace_indent_item = (
3036      len(nesting_state.stack) > 1 and
3037      nesting_state.stack[-1].check_namespace_indentation and
3038      isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and
3039      nesting_state.previous_stack_top == nesting_state.stack[-2])
3040
3041  if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item,
3042                                     clean_lines.elided, line):
3043    CheckItemIndentationInNamespace(filename, clean_lines.elided,
3044                                    line, error)
3045
3046
3047def CheckForFunctionLengths(filename, clean_lines, linenum,
3048                            function_state, error):
3049  """Reports for long function bodies.
3050
3051  For an overview why this is done, see:
3052  https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
3053
3054  Uses a simplistic algorithm assuming other style guidelines
3055  (especially spacing) are followed.
3056  Only checks unindented functions, so class members are unchecked.
3057  Trivial bodies are unchecked, so constructors with huge initializer lists
3058  may be missed.
3059  Blank/comment lines are not counted so as to avoid encouraging the removal
3060  of vertical space and comments just to get through a lint check.
3061  NOLINT *on the last line of a function* disables this check.
3062
3063  Args:
3064    filename: The name of the current file.
3065    clean_lines: A CleansedLines instance containing the file.
3066    linenum: The number of the line to check.
3067    function_state: Current function name and lines in body so far.
3068    error: The function to call with any errors found.
3069  """
3070  lines = clean_lines.lines
3071  line = lines[linenum]
3072  joined_line = ''
3073
3074  starting_func = False
3075  regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
3076  match_result = Match(regexp, line)
3077  if match_result:
3078    # If the name is all caps and underscores, figure it's a macro and
3079    # ignore it, unless it's TEST or TEST_F.
3080    function_name = match_result.group(1).split()[-1]
3081    if function_name == 'TEST' or function_name == 'TEST_F' or (
3082        not Match(r'[A-Z_]+$', function_name)):
3083      starting_func = True
3084
3085  if starting_func:
3086    body_found = False
3087    for start_linenum in xrange(linenum, clean_lines.NumLines()):
3088      start_line = lines[start_linenum]
3089      joined_line += ' ' + start_line.lstrip()
3090      if Search(r'(;|})', start_line):  # Declarations and trivial functions
3091        body_found = True
3092        break                              # ... ignore
3093      elif Search(r'{', start_line):
3094        body_found = True
3095        function = Search(r'((\w|:)*)\(', line).group(1)
3096        if Match(r'TEST', function):    # Handle TEST... macros
3097          parameter_regexp = Search(r'(\(.*\))', joined_line)
3098          if parameter_regexp:             # Ignore bad syntax
3099            function += parameter_regexp.group(1)
3100        else:
3101          function += '()'
3102        function_state.Begin(function)
3103        break
3104    if not body_found:
3105      # No body for the function (or evidence of a non-function) was found.
3106      error(filename, linenum, 'readability/fn_size', 5,
3107            'Lint failed to find start of function body.')
3108  elif Match(r'^\}\s*$', line):  # function end
3109    function_state.Check(error, filename, linenum)
3110    function_state.End()
3111  elif not Match(r'^\s*$', line):
3112    function_state.Count()  # Count non-blank/non-comment lines.
3113
3114
3115_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
3116
3117
3118def CheckComment(line, filename, linenum, next_line_start, error):
3119  """Checks for common mistakes in comments.
3120
3121  Args:
3122    line: The line in question.
3123    filename: The name of the current file.
3124    linenum: The number of the line to check.
3125    next_line_start: The first non-whitespace column of the next line.
3126    error: The function to call with any errors found.
3127  """
3128  commentpos = line.find('//')
3129  if commentpos != -1:
3130    # Check if the // may be in quotes.  If so, ignore it
3131    if re.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0:
3132      # Allow one space for new scopes, two spaces otherwise:
3133      if (not (Match(r'^.*{ *//', line) and next_line_start == commentpos) and
3134          ((commentpos >= 1 and
3135            line[commentpos-1] not in string.whitespace) or
3136           (commentpos >= 2 and
3137            line[commentpos-2] not in string.whitespace))):
3138        error(filename, linenum, 'whitespace/comments', 2,
3139              'At least two spaces is best between code and comments')
3140
3141      # Checks for common mistakes in TODO comments.
3142      comment = line[commentpos:]
3143      match = _RE_PATTERN_TODO.match(comment)
3144      if match:
3145        # One whitespace is correct; zero whitespace is handled elsewhere.
3146        leading_whitespace = match.group(1)
3147        if len(leading_whitespace) > 1:
3148          error(filename, linenum, 'whitespace/todo', 2,
3149                'Too many spaces before TODO')
3150
3151        username = match.group(2)
3152        if not username:
3153          error(filename, linenum, 'readability/todo', 2,
3154                'Missing username in TODO; it should look like '
3155                '"// TODO(my_username): Stuff."')
3156
3157        middle_whitespace = match.group(3)
3158        # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
3159        if middle_whitespace != ' ' and middle_whitespace != '':
3160          error(filename, linenum, 'whitespace/todo', 2,
3161                'TODO(my_username) should be followed by a space')
3162
3163      # If the comment contains an alphanumeric character, there
3164      # should be a space somewhere between it and the // unless
3165      # it's a /// or //! Doxygen comment.
3166      if (Match(r'//[^ ]*\w', comment) and
3167          not Match(r'(///|//\!)(\s+|$)', comment)):
3168        error(filename, linenum, 'whitespace/comments', 4,
3169              'Should have a space between // and comment')
3170
3171
3172def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
3173  """Checks for the correctness of various spacing issues in the code.
3174
3175  Things we check for: spaces around operators, spaces after
3176  if/for/while/switch, no spaces around parens in function calls, two
3177  spaces between code and comment, don't start a block with a blank
3178  line, don't end a function with a blank line, don't add a blank line
3179  after public/protected/private, don't have too many blank lines in a row.
3180
3181  Args:
3182    filename: The name of the current file.
3183    clean_lines: A CleansedLines instance containing the file.
3184    linenum: The number of the line to check.
3185    nesting_state: A NestingState instance which maintains information about
3186                   the current stack of nested blocks being parsed.
3187    error: The function to call with any errors found.
3188  """
3189
3190  # Don't use "elided" lines here, otherwise we can't check commented lines.
3191  # Don't want to use "raw" either, because we don't want to check inside C++11
3192  # raw strings,
3193  raw = clean_lines.lines_without_raw_strings
3194  line = raw[linenum]
3195
3196  # Before nixing comments, check if the line is blank for no good
3197  # reason.  This includes the first line after a block is opened, and
3198  # blank lines at the end of a function (ie, right before a line like '}'
3199  #
3200  # Skip all the blank line checks if we are immediately inside a
3201  # namespace body.  In other words, don't issue blank line warnings
3202  # for this block:
3203  #   namespace {
3204  #
3205  #   }
3206  #
3207  # A warning about missing end of namespace comments will be issued instead.
3208  #
3209  # Also skip blank line checks for 'extern "C"' blocks, which are formatted
3210  # like namespaces.
3211  if (IsBlankLine(line) and
3212      not nesting_state.InNamespaceBody() and
3213      not nesting_state.InExternC()):
3214    elided = clean_lines.elided
3215    prev_line = elided[linenum - 1]
3216    prevbrace = prev_line.rfind('{')
3217    # TODO(unknown): Don't complain if line before blank line, and line after,
3218    #                both start with alnums and are indented the same amount.
3219    #                This ignores whitespace at the start of a namespace block
3220    #                because those are not usually indented.
3221    if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
3222      # OK, we have a blank line at the start of a code block.  Before we
3223      # complain, we check if it is an exception to the rule: The previous
3224      # non-empty line has the parameters of a function header that are indented
3225      # 4 spaces (because they did not fit in a 80 column line when placed on
3226      # the same line as the function name).  We also check for the case where
3227      # the previous line is indented 6 spaces, which may happen when the
3228      # initializers of a constructor do not fit into a 80 column line.
3229      exception = False
3230      if Match(r' {6}\w', prev_line):  # Initializer list?
3231        # We are looking for the opening column of initializer list, which
3232        # should be indented 4 spaces to cause 6 space indentation afterwards.
3233        search_position = linenum-2
3234        while (search_position >= 0
3235               and Match(r' {6}\w', elided[search_position])):
3236          search_position -= 1
3237        exception = (search_position >= 0
3238                     and elided[search_position][:5] == '    :')
3239      else:
3240        # Search for the function arguments or an initializer list.  We use a
3241        # simple heuristic here: If the line is indented 4 spaces; and we have a
3242        # closing paren, without the opening paren, followed by an opening brace
3243        # or colon (for initializer lists) we assume that it is the last line of
3244        # a function header.  If we have a colon indented 4 spaces, it is an
3245        # initializer list.
3246        exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
3247                           prev_line)
3248                     or Match(r' {4}:', prev_line))
3249
3250      if not exception:
3251        error(filename, linenum, 'whitespace/blank_line', 2,
3252              'Redundant blank line at the start of a code block '
3253              'should be deleted.')
3254    # Ignore blank lines at the end of a block in a long if-else
3255    # chain, like this:
3256    #   if (condition1) {
3257    #     // Something followed by a blank line
3258    #
3259    #   } else if (condition2) {
3260    #     // Something else
3261    #   }
3262    if linenum + 1 < clean_lines.NumLines():
3263      next_line = raw[linenum + 1]
3264      if (next_line
3265          and Match(r'\s*}', next_line)
3266          and next_line.find('} else ') == -1):
3267        error(filename, linenum, 'whitespace/blank_line', 3,
3268              'Redundant blank line at the end of a code block '
3269              'should be deleted.')
3270
3271    matched = Match(r'\s*(public|protected|private):', prev_line)
3272    if matched:
3273      error(filename, linenum, 'whitespace/blank_line', 3,
3274            'Do not leave a blank line after "%s:"' % matched.group(1))
3275
3276  # Next, check comments
3277  next_line_start = 0
3278  if linenum + 1 < clean_lines.NumLines():
3279    next_line = raw[linenum + 1]
3280    next_line_start = len(next_line) - len(next_line.lstrip())
3281  CheckComment(line, filename, linenum, next_line_start, error)
3282
3283  # get rid of comments and strings
3284  line = clean_lines.elided[linenum]
3285
3286  # You shouldn't have spaces before your brackets, except maybe after
3287  # 'delete []', 'return []() {};', or 'auto [abc, ...] = ...;'.
3288  if Search(r'\w\s+\[', line) and not Search(r'(?:auto&?|delete|return)\s+\[', line):
3289    error(filename, linenum, 'whitespace/braces', 5,
3290          'Extra space before [')
3291
3292  # In range-based for, we wanted spaces before and after the colon, but
3293  # not around "::" tokens that might appear.
3294  if (Search(r'for *\(.*[^:]:[^: ]', line) or
3295      Search(r'for *\(.*[^: ]:[^:]', line)):
3296    error(filename, linenum, 'whitespace/forcolon', 2,
3297          'Missing space around colon in range-based for loop')
3298
3299
3300def CheckOperatorSpacing(filename, clean_lines, linenum, error):
3301  """Checks for horizontal spacing around operators.
3302
3303  Args:
3304    filename: The name of the current file.
3305    clean_lines: A CleansedLines instance containing the file.
3306    linenum: The number of the line to check.
3307    error: The function to call with any errors found.
3308  """
3309  line = clean_lines.elided[linenum]
3310
3311  # Don't try to do spacing checks for operator methods.  Do this by
3312  # replacing the troublesome characters with something else,
3313  # preserving column position for all other characters.
3314  #
3315  # The replacement is done repeatedly to avoid false positives from
3316  # operators that call operators.
3317  while True:
3318    match = Match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line)
3319    if match:
3320      line = match.group(1) + ('_' * len(match.group(2))) + match.group(3)
3321    else:
3322      break
3323
3324  # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
3325  # Otherwise not.  Note we only check for non-spaces on *both* sides;
3326  # sometimes people put non-spaces on one side when aligning ='s among
3327  # many lines (not that this is behavior that I approve of...)
3328  if ((Search(r'[\w.]=', line) or
3329       Search(r'=[\w.]', line))
3330      and not Search(r'\b(if|while|for) ', line)
3331      # Operators taken from [lex.operators] in C++11 standard.
3332      and not Search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line)
3333      and not Search(r'operator=', line)):
3334    error(filename, linenum, 'whitespace/operators', 4,
3335          'Missing spaces around =')
3336
3337  # It's ok not to have spaces around binary operators like + - * /, but if
3338  # there's too little whitespace, we get concerned.  It's hard to tell,
3339  # though, so we punt on this one for now.  TODO.
3340
3341  # You should always have whitespace around binary operators.
3342  #
3343  # Check <= and >= first to avoid false positives with < and >, then
3344  # check non-include lines for spacing around < and >.
3345  #
3346  # If the operator is followed by a comma, assume it's be used in a
3347  # macro context and don't do any checks.  This avoids false
3348  # positives.
3349  #
3350  # Note that && is not included here.  This is because there are too
3351  # many false positives due to RValue references.
3352  match = Search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line)
3353  if match:
3354    error(filename, linenum, 'whitespace/operators', 3,
3355          'Missing spaces around %s' % match.group(1))
3356  elif not Match(r'#.*include', line):
3357    # Look for < that is not surrounded by spaces.  This is only
3358    # triggered if both sides are missing spaces, even though
3359    # technically should should flag if at least one side is missing a
3360    # space.  This is done to avoid some false positives with shifts.
3361    match = Match(r'^(.*[^\s<])<[^\s=<,]', line)
3362    if match:
3363      (_, _, end_pos) = CloseExpression(
3364          clean_lines, linenum, len(match.group(1)))
3365      if end_pos <= -1:
3366        error(filename, linenum, 'whitespace/operators', 3,
3367              'Missing spaces around <')
3368
3369    # Look for > that is not surrounded by spaces.  Similar to the
3370    # above, we only trigger if both sides are missing spaces to avoid
3371    # false positives with shifts.
3372    match = Match(r'^(.*[^-\s>])>[^\s=>,]', line)
3373    if match:
3374      (_, _, start_pos) = ReverseCloseExpression(
3375          clean_lines, linenum, len(match.group(1)))
3376      if start_pos <= -1:
3377        error(filename, linenum, 'whitespace/operators', 3,
3378              'Missing spaces around >')
3379
3380  # We allow no-spaces around << when used like this: 10<<20, but
3381  # not otherwise (particularly, not when used as streams)
3382  #
3383  # We also allow operators following an opening parenthesis, since
3384  # those tend to be macros that deal with operators.
3385  match = Search(r'(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])', line)
3386  if (match and not (match.group(1).isdigit() and match.group(2).isdigit()) and
3387      not (match.group(1) == 'operator' and match.group(2) == ';')):
3388    error(filename, linenum, 'whitespace/operators', 3,
3389          'Missing spaces around <<')
3390
3391  # We allow no-spaces around >> for almost anything.  This is because
3392  # C++11 allows ">>" to close nested templates, which accounts for
3393  # most cases when ">>" is not followed by a space.
3394  #
3395  # We still warn on ">>" followed by alpha character, because that is
3396  # likely due to ">>" being used for right shifts, e.g.:
3397  #   value >> alpha
3398  #
3399  # When ">>" is used to close templates, the alphanumeric letter that
3400  # follows would be part of an identifier, and there should still be
3401  # a space separating the template type and the identifier.
3402  #   type<type<type>> alpha
3403  match = Search(r'>>[a-zA-Z_]', line)
3404  if match:
3405    error(filename, linenum, 'whitespace/operators', 3,
3406          'Missing spaces around >>')
3407
3408  # There shouldn't be space around unary operators
3409  match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
3410  if match:
3411    error(filename, linenum, 'whitespace/operators', 4,
3412          'Extra space for operator %s' % match.group(1))
3413
3414
3415def CheckParenthesisSpacing(filename, clean_lines, linenum, error):
3416  """Checks for horizontal spacing around parentheses.
3417
3418  Args:
3419    filename: The name of the current file.
3420    clean_lines: A CleansedLines instance containing the file.
3421    linenum: The number of the line to check.
3422    error: The function to call with any errors found.
3423  """
3424  line = clean_lines.elided[linenum]
3425
3426  # No spaces after an if, while, switch, or for
3427  match = Search(r' (if\(|for\(|while\(|switch\()', line)
3428  if match:
3429    error(filename, linenum, 'whitespace/parens', 5,
3430          'Missing space before ( in %s' % match.group(1))
3431
3432  # For if/for/while/switch, the left and right parens should be
3433  # consistent about how many spaces are inside the parens, and
3434  # there should either be zero or one spaces inside the parens.
3435  # We don't want: "if ( foo)" or "if ( foo   )".
3436  # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
3437  match = Search(r'\b(if|for|while|switch)\s*'
3438                 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
3439                 line)
3440  if match:
3441    if len(match.group(2)) != len(match.group(4)):
3442      if not (match.group(3) == ';' and
3443              len(match.group(2)) == 1 + len(match.group(4)) or
3444              not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
3445        error(filename, linenum, 'whitespace/parens', 5,
3446              'Mismatching spaces inside () in %s' % match.group(1))
3447    if len(match.group(2)) not in [0, 1]:
3448      error(filename, linenum, 'whitespace/parens', 5,
3449            'Should have zero or one spaces inside ( and ) in %s' %
3450            match.group(1))
3451
3452
3453def CheckCommaSpacing(filename, clean_lines, linenum, error):
3454  """Checks for horizontal spacing near commas and semicolons.
3455
3456  Args:
3457    filename: The name of the current file.
3458    clean_lines: A CleansedLines instance containing the file.
3459    linenum: The number of the line to check.
3460    error: The function to call with any errors found.
3461  """
3462  raw = clean_lines.lines_without_raw_strings
3463  line = clean_lines.elided[linenum]
3464
3465  # You should always have a space after a comma (either as fn arg or operator)
3466  #
3467  # This does not apply when the non-space character following the
3468  # comma is another comma, since the only time when that happens is
3469  # for empty macro arguments.
3470  #
3471  # We run this check in two passes: first pass on elided lines to
3472  # verify that lines contain missing whitespaces, second pass on raw
3473  # lines to confirm that those missing whitespaces are not due to
3474  # elided comments.
3475  if (Search(r',[^,\s]', ReplaceAll(r'\boperator\s*,\s*\(', 'F(', line)) and
3476      Search(r',[^,\s]', raw[linenum])):
3477    error(filename, linenum, 'whitespace/comma', 3,
3478          'Missing space after ,')
3479
3480  # You should always have a space after a semicolon
3481  # except for few corner cases
3482  # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
3483  # space after ;
3484  if Search(r';[^\s};\\)/]', line):
3485    error(filename, linenum, 'whitespace/semicolon', 3,
3486          'Missing space after ;')
3487
3488
3489def _IsType(clean_lines, nesting_state, expr):
3490  """Check if expression looks like a type name, returns true if so.
3491
3492  Args:
3493    clean_lines: A CleansedLines instance containing the file.
3494    nesting_state: A NestingState instance which maintains information about
3495                   the current stack of nested blocks being parsed.
3496    expr: The expression to check.
3497  Returns:
3498    True, if token looks like a type.
3499  """
3500  # Keep only the last token in the expression
3501  last_word = Match(r'^.*(\b\S+)$', expr)
3502  if last_word:
3503    token = last_word.group(1)
3504  else:
3505    token = expr
3506
3507  # Match native types and stdint types
3508  if _TYPES.match(token):
3509    return True
3510
3511  # Try a bit harder to match templated types.  Walk up the nesting
3512  # stack until we find something that resembles a typename
3513  # declaration for what we are looking for.
3514  typename_pattern = (r'\b(?:typename|class|struct)\s+' + re.escape(token) +
3515                      r'\b')
3516  block_index = len(nesting_state.stack) - 1
3517  while block_index >= 0:
3518    if isinstance(nesting_state.stack[block_index], _NamespaceInfo):
3519      return False
3520
3521    # Found where the opening brace is.  We want to scan from this
3522    # line up to the beginning of the function, minus a few lines.
3523    #   template <typename Type1,  // stop scanning here
3524    #             ...>
3525    #   class C
3526    #     : public ... {  // start scanning here
3527    last_line = nesting_state.stack[block_index].starting_linenum
3528
3529    next_block_start = 0
3530    if block_index > 0:
3531      next_block_start = nesting_state.stack[block_index - 1].starting_linenum
3532    first_line = last_line
3533    while first_line >= next_block_start:
3534      if clean_lines.elided[first_line].find('template') >= 0:
3535        break
3536      first_line -= 1
3537    if first_line < next_block_start:
3538      # Didn't find any "template" keyword before reaching the next block,
3539      # there are probably no template things to check for this block
3540      block_index -= 1
3541      continue
3542
3543    # Look for typename in the specified range
3544    for i in xrange(first_line, last_line + 1, 1):
3545      if Search(typename_pattern, clean_lines.elided[i]):
3546        return True
3547    block_index -= 1
3548
3549  return False
3550
3551
3552def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error):
3553  """Checks for horizontal spacing near commas.
3554
3555  Args:
3556    filename: The name of the current file.
3557    clean_lines: A CleansedLines instance containing the file.
3558    linenum: The number of the line to check.
3559    nesting_state: A NestingState instance which maintains information about
3560                   the current stack of nested blocks being parsed.
3561    error: The function to call with any errors found.
3562  """
3563  line = clean_lines.elided[linenum]
3564
3565  # Except after an opening paren, or after another opening brace (in case of
3566  # an initializer list, for instance), you should have spaces before your
3567  # braces when they are delimiting blocks, classes, namespaces etc.
3568  # And since you should never have braces at the beginning of a line,
3569  # this is an easy test.  Except that braces used for initialization don't
3570  # follow the same rule; we often don't want spaces before those.
3571  match = Match(r'^(.*[^ ({>]){', line)
3572
3573  if match:
3574    # Try a bit harder to check for brace initialization.  This
3575    # happens in one of the following forms:
3576    #   Constructor() : initializer_list_{} { ... }
3577    #   Constructor{}.MemberFunction()
3578    #   Type variable{};
3579    #   FunctionCall(type{}, ...);
3580    #   LastArgument(..., type{});
3581    #   LOG(INFO) << type{} << " ...";
3582    #   map_of_type[{...}] = ...;
3583    #   ternary = expr ? new type{} : nullptr;
3584    #   OuterTemplate<InnerTemplateConstructor<Type>{}>
3585    #
3586    # We check for the character following the closing brace, and
3587    # silence the warning if it's one of those listed above, i.e.
3588    # "{.;,)<>]:".
3589    #
3590    # To account for nested initializer list, we allow any number of
3591    # closing braces up to "{;,)<".  We can't simply silence the
3592    # warning on first sight of closing brace, because that would
3593    # cause false negatives for things that are not initializer lists.
3594    #   Silence this:         But not this:
3595    #     Outer{                if (...) {
3596    #       Inner{...}            if (...){  // Missing space before {
3597    #     };                    }
3598    #
3599    # There is a false negative with this approach if people inserted
3600    # spurious semicolons, e.g. "if (cond){};", but we will catch the
3601    # spurious semicolon with a separate check.
3602    leading_text = match.group(1)
3603    (endline, endlinenum, endpos) = CloseExpression(
3604        clean_lines, linenum, len(match.group(1)))
3605    trailing_text = ''
3606    if endpos > -1:
3607      trailing_text = endline[endpos:]
3608    for offset in xrange(endlinenum + 1,
3609                         min(endlinenum + 3, clean_lines.NumLines() - 1)):
3610      trailing_text += clean_lines.elided[offset]
3611    # We also suppress warnings for `uint64_t{expression}` etc., as the style
3612    # guide recommends brace initialization for integral types to avoid
3613    # overflow/truncation.
3614    if (not Match(r'^[\s}]*[{.;,)<>\]:]', trailing_text)
3615        and not _IsType(clean_lines, nesting_state, leading_text)):
3616      error(filename, linenum, 'whitespace/braces', 5,
3617            'Missing space before {')
3618
3619  # Make sure '} else {' has spaces.
3620  if Search(r'}else', line):
3621    error(filename, linenum, 'whitespace/braces', 5,
3622          'Missing space before else')
3623
3624  # You shouldn't have a space before a semicolon at the end of the line.
3625  # There's a special case for "for" since the style guide allows space before
3626  # the semicolon there.
3627  if Search(r':\s*;\s*$', line):
3628    error(filename, linenum, 'whitespace/semicolon', 5,
3629          'Semicolon defining empty statement. Use {} instead.')
3630  elif Search(r'^\s*;\s*$', line):
3631    error(filename, linenum, 'whitespace/semicolon', 5,
3632          'Line contains only semicolon. If this should be an empty statement, '
3633          'use {} instead.')
3634  elif (Search(r'\s+;\s*$', line) and
3635        not Search(r'\bfor\b', line)):
3636    error(filename, linenum, 'whitespace/semicolon', 5,
3637          'Extra space before last semicolon. If this should be an empty '
3638          'statement, use {} instead.')
3639
3640
3641def IsDecltype(clean_lines, linenum, column):
3642  """Check if the token ending on (linenum, column) is decltype().
3643
3644  Args:
3645    clean_lines: A CleansedLines instance containing the file.
3646    linenum: the number of the line to check.
3647    column: end column of the token to check.
3648  Returns:
3649    True if this token is decltype() expression, False otherwise.
3650  """
3651  (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column)
3652  if start_col < 0:
3653    return False
3654  if Search(r'\bdecltype\s*$', text[0:start_col]):
3655    return True
3656  return False
3657
3658
3659def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
3660  """Checks for additional blank line issues related to sections.
3661
3662  Currently the only thing checked here is blank line before protected/private.
3663
3664  Args:
3665    filename: The name of the current file.
3666    clean_lines: A CleansedLines instance containing the file.
3667    class_info: A _ClassInfo objects.
3668    linenum: The number of the line to check.
3669    error: The function to call with any errors found.
3670  """
3671  # Skip checks if the class is small, where small means 25 lines or less.
3672  # 25 lines seems like a good cutoff since that's the usual height of
3673  # terminals, and any class that can't fit in one screen can't really
3674  # be considered "small".
3675  #
3676  # Also skip checks if we are on the first line.  This accounts for
3677  # classes that look like
3678  #   class Foo { public: ... };
3679  #
3680  # If we didn't find the end of the class, last_line would be zero,
3681  # and the check will be skipped by the first condition.
3682  if (class_info.last_line - class_info.starting_linenum <= 24 or
3683      linenum <= class_info.starting_linenum):
3684    return
3685
3686  matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
3687  if matched:
3688    # Issue warning if the line before public/protected/private was
3689    # not a blank line, but don't do this if the previous line contains
3690    # "class" or "struct".  This can happen two ways:
3691    #  - We are at the beginning of the class.
3692    #  - We are forward-declaring an inner class that is semantically
3693    #    private, but needed to be public for implementation reasons.
3694    # Also ignores cases where the previous line ends with a backslash as can be
3695    # common when defining classes in C macros.
3696    prev_line = clean_lines.lines[linenum - 1]
3697    if (not IsBlankLine(prev_line) and
3698        not Search(r'\b(class|struct)\b', prev_line) and
3699        not Search(r'\\$', prev_line)):
3700      # Try a bit harder to find the beginning of the class.  This is to
3701      # account for multi-line base-specifier lists, e.g.:
3702      #   class Derived
3703      #       : public Base {
3704      end_class_head = class_info.starting_linenum
3705      for i in range(class_info.starting_linenum, linenum):
3706        if Search(r'\{\s*$', clean_lines.lines[i]):
3707          end_class_head = i
3708          break
3709      if end_class_head < linenum - 1:
3710        error(filename, linenum, 'whitespace/blank_line', 3,
3711              '"%s:" should be preceded by a blank line' % matched.group(1))
3712
3713
3714def GetPreviousNonBlankLine(clean_lines, linenum):
3715  """Return the most recent non-blank line and its line number.
3716
3717  Args:
3718    clean_lines: A CleansedLines instance containing the file contents.
3719    linenum: The number of the line to check.
3720
3721  Returns:
3722    A tuple with two elements.  The first element is the contents of the last
3723    non-blank line before the current line, or the empty string if this is the
3724    first non-blank line.  The second is the line number of that line, or -1
3725    if this is the first non-blank line.
3726  """
3727
3728  prevlinenum = linenum - 1
3729  while prevlinenum >= 0:
3730    prevline = clean_lines.elided[prevlinenum]
3731    if not IsBlankLine(prevline):     # if not a blank line...
3732      return (prevline, prevlinenum)
3733    prevlinenum -= 1
3734  return ('', -1)
3735
3736
3737def CheckBraces(filename, clean_lines, linenum, error):
3738  """Looks for misplaced braces (e.g. at the end of line).
3739
3740  Args:
3741    filename: The name of the current file.
3742    clean_lines: A CleansedLines instance containing the file.
3743    linenum: The number of the line to check.
3744    error: The function to call with any errors found.
3745  """
3746
3747  line = clean_lines.elided[linenum]        # get rid of comments and strings
3748
3749  if Match(r'\s*{\s*$', line):
3750    # We allow an open brace to start a line in the case where someone is using
3751    # braces in a block to explicitly create a new scope, which is commonly used
3752    # to control the lifetime of stack-allocated variables.  Braces are also
3753    # used for brace initializers inside function calls.  We don't detect this
3754    # perfectly: we just don't complain if the last non-whitespace character on
3755    # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
3756    # previous line starts a preprocessor block. We also allow a brace on the
3757    # following line if it is part of an array initialization and would not fit
3758    # within the 80 character limit of the preceding line.
3759    prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3760    if (not Search(r'[,;:}{(]\s*$', prevline) and
3761        not Match(r'\s*#', prevline) and
3762        not (GetLineWidth(prevline) > _line_length - 2 and '[]' in prevline)):
3763      error(filename, linenum, 'whitespace/braces', 4,
3764            '{ should almost always be at the end of the previous line')
3765
3766  # An else clause should be on the same line as the preceding closing brace.
3767  if Match(r'\s*else\b\s*(?:if\b|\{|$)', line):
3768    prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3769    if Match(r'\s*}\s*$', prevline):
3770      error(filename, linenum, 'whitespace/newline', 4,
3771            'An else should appear on the same line as the preceding }')
3772
3773  # If braces come on one side of an else, they should be on both.
3774  # However, we have to worry about "else if" that spans multiple lines!
3775  if Search(r'else if\s*\(', line):       # could be multi-line if
3776    brace_on_left = bool(Search(r'}\s*else if\s*\(', line))
3777    # find the ( after the if
3778    pos = line.find('else if')
3779    pos = line.find('(', pos)
3780    if pos > 0:
3781      (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
3782      brace_on_right = endline[endpos:].find('{') != -1
3783      if brace_on_left != brace_on_right:    # must be brace after if
3784        error(filename, linenum, 'readability/braces', 5,
3785              'If an else has a brace on one side, it should have it on both')
3786  elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
3787    error(filename, linenum, 'readability/braces', 5,
3788          'If an else has a brace on one side, it should have it on both')
3789
3790  # Likewise, an else should never have the else clause on the same line
3791  if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
3792    error(filename, linenum, 'whitespace/newline', 4,
3793          'Else clause should never be on same line as else (use 2 lines)')
3794
3795  # In the same way, a do/while should never be on one line
3796  if Match(r'\s*do [^\s{]', line):
3797    error(filename, linenum, 'whitespace/newline', 4,
3798          'do/while clauses should not be on a single line')
3799
3800  # Check single-line if/else bodies. The style guide says 'curly braces are not
3801  # required for single-line statements'. We additionally allow multi-line,
3802  # single statements, but we reject anything with more than one semicolon in
3803  # it. This means that the first semicolon after the if should be at the end of
3804  # its line, and the line after that should have an indent level equal to or
3805  # lower than the if. We also check for ambiguous if/else nesting without
3806  # braces.
3807  if_else_match = Search(r'\b(if\s*\(|else\b)', line)
3808  if if_else_match and not Match(r'\s*#', line):
3809    if_indent = GetIndentLevel(line)
3810    endline, endlinenum, endpos = line, linenum, if_else_match.end()
3811    if_match = Search(r'\bif\s*\(', line)
3812    if if_match:
3813      # This could be a multiline if condition, so find the end first.
3814      pos = if_match.end() - 1
3815      (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos)
3816    # Check for an opening brace, either directly after the if or on the next
3817    # line. If found, this isn't a single-statement conditional.
3818    if (not Match(r'\s*{', endline[endpos:])
3819        and not (Match(r'\s*$', endline[endpos:])
3820                 and endlinenum < (len(clean_lines.elided) - 1)
3821                 and Match(r'\s*{', clean_lines.elided[endlinenum + 1]))):
3822      while (endlinenum < len(clean_lines.elided)
3823             and ';' not in clean_lines.elided[endlinenum][endpos:]):
3824        endlinenum += 1
3825        endpos = 0
3826      if endlinenum < len(clean_lines.elided):
3827        endline = clean_lines.elided[endlinenum]
3828        # We allow a mix of whitespace and closing braces (e.g. for one-liner
3829        # methods) and a single \ after the semicolon (for macros)
3830        endpos = endline.find(';')
3831        if not Match(r';[\s}]*(\\?)$', endline[endpos:]):
3832          # Semicolon isn't the last character, there's something trailing.
3833          # Output a warning if the semicolon is not contained inside
3834          # a lambda expression.
3835          if not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$',
3836                       endline):
3837            error(filename, linenum, 'readability/braces', 4,
3838                  'If/else bodies with multiple statements require braces')
3839        elif endlinenum < len(clean_lines.elided) - 1:
3840          # Make sure the next line is dedented
3841          next_line = clean_lines.elided[endlinenum + 1]
3842          next_indent = GetIndentLevel(next_line)
3843          # With ambiguous nested if statements, this will error out on the
3844          # if that *doesn't* match the else, regardless of whether it's the
3845          # inner one or outer one.
3846          if (if_match and Match(r'\s*else\b', next_line)
3847              and next_indent != if_indent):
3848            error(filename, linenum, 'readability/braces', 4,
3849                  'Else clause should be indented at the same level as if. '
3850                  'Ambiguous nested if/else chains require braces.')
3851          elif next_indent > if_indent:
3852            error(filename, linenum, 'readability/braces', 4,
3853                  'If/else bodies with multiple statements require braces')
3854
3855
3856def CheckTrailingSemicolon(filename, clean_lines, linenum, error):
3857  """Looks for redundant trailing semicolon.
3858
3859  Args:
3860    filename: The name of the current file.
3861    clean_lines: A CleansedLines instance containing the file.
3862    linenum: The number of the line to check.
3863    error: The function to call with any errors found.
3864  """
3865
3866  line = clean_lines.elided[linenum]
3867
3868  # Block bodies should not be followed by a semicolon.  Due to C++11
3869  # brace initialization, there are more places where semicolons are
3870  # required than not, so we explicitly list the allowed rules rather
3871  # than listing the disallowed ones.  These are the places where "};"
3872  # should be replaced by just "}":
3873  # 1. Some flavor of block following closing parenthesis:
3874  #    for (;;) {};
3875  #    while (...) {};
3876  #    switch (...) {};
3877  #    Function(...) {};
3878  #    if (...) {};
3879  #    if (...) else if (...) {};
3880  #
3881  # 2. else block:
3882  #    if (...) else {};
3883  #
3884  # 3. const member function:
3885  #    Function(...) const {};
3886  #
3887  # 4. Block following some statement:
3888  #    x = 42;
3889  #    {};
3890  #
3891  # 5. Block at the beginning of a function:
3892  #    Function(...) {
3893  #      {};
3894  #    }
3895  #
3896  #    Note that naively checking for the preceding "{" will also match
3897  #    braces inside multi-dimensional arrays, but this is fine since
3898  #    that expression will not contain semicolons.
3899  #
3900  # 6. Block following another block:
3901  #    while (true) {}
3902  #    {};
3903  #
3904  # 7. End of namespaces:
3905  #    namespace {};
3906  #
3907  #    These semicolons seems far more common than other kinds of
3908  #    redundant semicolons, possibly due to people converting classes
3909  #    to namespaces.  For now we do not warn for this case.
3910  #
3911  # Try matching case 1 first.
3912  match = Match(r'^(.*\)\s*)\{', line)
3913  if match:
3914    # Matched closing parenthesis (case 1).  Check the token before the
3915    # matching opening parenthesis, and don't warn if it looks like a
3916    # macro.  This avoids these false positives:
3917    #  - macro that defines a base class
3918    #  - multi-line macro that defines a base class
3919    #  - macro that defines the whole class-head
3920    #
3921    # But we still issue warnings for macros that we know are safe to
3922    # warn, specifically:
3923    #  - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
3924    #  - TYPED_TEST
3925    #  - INTERFACE_DEF
3926    #  - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
3927    #
3928    # We implement a list of safe macros instead of a list of
3929    # unsafe macros, even though the latter appears less frequently in
3930    # google code and would have been easier to implement.  This is because
3931    # the downside for getting the allowed checks wrong means some extra
3932    # semicolons, while the downside for getting disallowed checks wrong
3933    # would result in compile errors.
3934    #
3935    # In addition to macros, we also don't want to warn on
3936    #  - Compound literals
3937    #  - Lambdas
3938    #  - alignas specifier with anonymous structs
3939    #  - decltype
3940    closing_brace_pos = match.group(1).rfind(')')
3941    opening_parenthesis = ReverseCloseExpression(
3942        clean_lines, linenum, closing_brace_pos)
3943    if opening_parenthesis[2] > -1:
3944      line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
3945      macro = Search(r'\b([A-Z_][A-Z0-9_]*)\s*$', line_prefix)
3946      func = Match(r'^(.*\])\s*$', line_prefix)
3947      if ((macro and
3948           macro.group(1) not in (
3949               'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
3950               'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
3951               'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
3952          (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or
3953          Search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or
3954          Search(r'\bdecltype$', line_prefix) or
3955          Search(r'\s+=\s*$', line_prefix)):
3956        match = None
3957    if (match and
3958        opening_parenthesis[1] > 1 and
3959        Search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])):
3960      # Multi-line lambda-expression
3961      match = None
3962
3963  else:
3964    # Try matching cases 2-3.
3965    match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
3966    if not match:
3967      # Try matching cases 4-6.  These are always matched on separate lines.
3968      #
3969      # Note that we can't simply concatenate the previous line to the
3970      # current line and do a single match, otherwise we may output
3971      # duplicate warnings for the blank line case:
3972      #   if (cond) {
3973      #     // blank line
3974      #   }
3975      prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3976      if prevline and Search(r'[;{}]\s*$', prevline):
3977        match = Match(r'^(\s*)\{', line)
3978
3979  # Check matching closing brace
3980  if match:
3981    (endline, endlinenum, endpos) = CloseExpression(
3982        clean_lines, linenum, len(match.group(1)))
3983    if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
3984      # Current {} pair is eligible for semicolon check, and we have found
3985      # the redundant semicolon, output warning here.
3986      #
3987      # Note: because we are scanning forward for opening braces, and
3988      # outputting warnings for the matching closing brace, if there are
3989      # nested blocks with trailing semicolons, we will get the error
3990      # messages in reversed order.
3991
3992      # We need to check the line forward for NOLINT
3993      raw_lines = clean_lines.raw_lines
3994      ParseNolintSuppressions(filename, raw_lines[endlinenum-1], endlinenum-1,
3995                              error)
3996      ParseNolintSuppressions(filename, raw_lines[endlinenum], endlinenum,
3997                              error)
3998
3999      error(filename, endlinenum, 'readability/braces', 4,
4000            "You don't need a ; after a }")
4001
4002
4003def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
4004  """Look for empty loop/conditional body with only a single semicolon.
4005
4006  Args:
4007    filename: The name of the current file.
4008    clean_lines: A CleansedLines instance containing the file.
4009    linenum: The number of the line to check.
4010    error: The function to call with any errors found.
4011  """
4012
4013  # Search for loop keywords at the beginning of the line.  Because only
4014  # whitespaces are allowed before the keywords, this will also ignore most
4015  # do-while-loops, since those lines should start with closing brace.
4016  #
4017  # We also check "if" blocks here, since an empty conditional block
4018  # is likely an error.
4019  line = clean_lines.elided[linenum]
4020  matched = Match(r'\s*(for|while|if)\s*\(', line)
4021  if matched:
4022    # Find the end of the conditional expression.
4023    (end_line, end_linenum, end_pos) = CloseExpression(
4024        clean_lines, linenum, line.find('('))
4025
4026    # Output warning if what follows the condition expression is a semicolon.
4027    # No warning for all other cases, including whitespace or newline, since we
4028    # have a separate check for semicolons preceded by whitespace.
4029    if end_pos >= 0 and Match(r';', end_line[end_pos:]):
4030      if matched.group(1) == 'if':
4031        error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
4032              'Empty conditional bodies should use {}')
4033      else:
4034        error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
4035              'Empty loop bodies should use {} or continue')
4036
4037    # Check for if statements that have completely empty bodies (no comments)
4038    # and no else clauses.
4039    if end_pos >= 0 and matched.group(1) == 'if':
4040      # Find the position of the opening { for the if statement.
4041      # Return without logging an error if it has no brackets.
4042      opening_linenum = end_linenum
4043      opening_line_fragment = end_line[end_pos:]
4044      # Loop until EOF or find anything that's not whitespace or opening {.
4045      while not Search(r'^\s*\{', opening_line_fragment):
4046        if Search(r'^(?!\s*$)', opening_line_fragment):
4047          # Conditional has no brackets.
4048          return
4049        opening_linenum += 1
4050        if opening_linenum == len(clean_lines.elided):
4051          # Couldn't find conditional's opening { or any code before EOF.
4052          return
4053        opening_line_fragment = clean_lines.elided[opening_linenum]
4054      # Set opening_line (opening_line_fragment may not be entire opening line).
4055      opening_line = clean_lines.elided[opening_linenum]
4056
4057      # Find the position of the closing }.
4058      opening_pos = opening_line_fragment.find('{')
4059      if opening_linenum == end_linenum:
4060        # We need to make opening_pos relative to the start of the entire line.
4061        opening_pos += end_pos
4062      (closing_line, closing_linenum, closing_pos) = CloseExpression(
4063          clean_lines, opening_linenum, opening_pos)
4064      if closing_pos < 0:
4065        return
4066
4067      # Now construct the body of the conditional. This consists of the portion
4068      # of the opening line after the {, all lines until the closing line,
4069      # and the portion of the closing line before the }.
4070      if (clean_lines.raw_lines[opening_linenum] !=
4071          CleanseComments(clean_lines.raw_lines[opening_linenum])):
4072        # Opening line ends with a comment, so conditional isn't empty.
4073        return
4074      if closing_linenum > opening_linenum:
4075        # Opening line after the {. Ignore comments here since we checked above.
4076        body = list(opening_line[opening_pos+1:])
4077        # All lines until closing line, excluding closing line, with comments.
4078        body.extend(clean_lines.raw_lines[opening_linenum+1:closing_linenum])
4079        # Closing line before the }. Won't (and can't) have comments.
4080        body.append(clean_lines.elided[closing_linenum][:closing_pos-1])
4081        body = '\n'.join(body)
4082      else:
4083        # If statement has brackets and fits on a single line.
4084        body = opening_line[opening_pos+1:closing_pos-1]
4085
4086      # Check if the body is empty
4087      if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body):
4088        return
4089      # The body is empty. Now make sure there's not an else clause.
4090      current_linenum = closing_linenum
4091      current_line_fragment = closing_line[closing_pos:]
4092      # Loop until EOF or find anything that's not whitespace or else clause.
4093      while Search(r'^\s*$|^(?=\s*else)', current_line_fragment):
4094        if Search(r'^(?=\s*else)', current_line_fragment):
4095          # Found an else clause, so don't log an error.
4096          return
4097        current_linenum += 1
4098        if current_linenum == len(clean_lines.elided):
4099          break
4100        current_line_fragment = clean_lines.elided[current_linenum]
4101
4102      # The body is empty and there's no else clause until EOF or other code.
4103      error(filename, end_linenum, 'whitespace/empty_if_body', 4,
4104            ('If statement had no body and no else clause'))
4105
4106
4107def FindCheckMacro(line):
4108  """Find a replaceable CHECK-like macro.
4109
4110  Args:
4111    line: line to search on.
4112  Returns:
4113    (macro name, start position), or (None, -1) if no replaceable
4114    macro is found.
4115  """
4116  for macro in _CHECK_MACROS:
4117    i = line.find(macro)
4118    if i >= 0:
4119      # Find opening parenthesis.  Do a regular expression match here
4120      # to make sure that we are matching the expected CHECK macro, as
4121      # opposed to some other macro that happens to contain the CHECK
4122      # substring.
4123      matched = Match(r'^(.*\b' + macro + r'\s*)\(', line)
4124      if not matched:
4125        continue
4126      return (macro, len(matched.group(1)))
4127  return (None, -1)
4128
4129
4130def CheckCheck(filename, clean_lines, linenum, error):
4131  """Checks the use of CHECK and EXPECT macros.
4132
4133  Args:
4134    filename: The name of the current file.
4135    clean_lines: A CleansedLines instance containing the file.
4136    linenum: The number of the line to check.
4137    error: The function to call with any errors found.
4138  """
4139
4140  # Decide the set of replacement macros that should be suggested
4141  lines = clean_lines.elided
4142  (check_macro, start_pos) = FindCheckMacro(lines[linenum])
4143  if not check_macro:
4144    return
4145
4146  # Find end of the boolean expression by matching parentheses
4147  (last_line, end_line, end_pos) = CloseExpression(
4148      clean_lines, linenum, start_pos)
4149  if end_pos < 0:
4150    return
4151
4152  # If the check macro is followed by something other than a
4153  # semicolon, assume users will log their own custom error messages
4154  # and don't suggest any replacements.
4155  if not Match(r'\s*;', last_line[end_pos:]):
4156    return
4157
4158  if linenum == end_line:
4159    expression = lines[linenum][start_pos + 1:end_pos - 1]
4160  else:
4161    expression = lines[linenum][start_pos + 1:]
4162    for i in xrange(linenum + 1, end_line):
4163      expression += lines[i]
4164    expression += last_line[0:end_pos - 1]
4165
4166  # Parse expression so that we can take parentheses into account.
4167  # This avoids false positives for inputs like "CHECK((a < 4) == b)",
4168  # which is not replaceable by CHECK_LE.
4169  lhs = ''
4170  rhs = ''
4171  operator = None
4172  while expression:
4173    matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
4174                    r'==|!=|>=|>|<=|<|\()(.*)$', expression)
4175    if matched:
4176      token = matched.group(1)
4177      if token == '(':
4178        # Parenthesized operand
4179        expression = matched.group(2)
4180        (end, _) = FindEndOfExpressionInLine(expression, 0, ['('])
4181        if end < 0:
4182          return  # Unmatched parenthesis
4183        lhs += '(' + expression[0:end]
4184        expression = expression[end:]
4185      elif token in ('&&', '||'):
4186        # Logical and/or operators.  This means the expression
4187        # contains more than one term, for example:
4188        #   CHECK(42 < a && a < b);
4189        #
4190        # These are not replaceable with CHECK_LE, so bail out early.
4191        return
4192      elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
4193        # Non-relational operator
4194        lhs += token
4195        expression = matched.group(2)
4196      else:
4197        # Relational operator
4198        operator = token
4199        rhs = matched.group(2)
4200        break
4201    else:
4202      # Unparenthesized operand.  Instead of appending to lhs one character
4203      # at a time, we do another regular expression match to consume several
4204      # characters at once if possible.  Trivial benchmark shows that this
4205      # is more efficient when the operands are longer than a single
4206      # character, which is generally the case.
4207      matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
4208      if not matched:
4209        matched = Match(r'^(\s*\S)(.*)$', expression)
4210        if not matched:
4211          break
4212      lhs += matched.group(1)
4213      expression = matched.group(2)
4214
4215  # Only apply checks if we got all parts of the boolean expression
4216  if not (lhs and operator and rhs):
4217    return
4218
4219  # Check that rhs do not contain logical operators.  We already know
4220  # that lhs is fine since the loop above parses out && and ||.
4221  if rhs.find('&&') > -1 or rhs.find('||') > -1:
4222    return
4223
4224  # At least one of the operands must be a constant literal.  This is
4225  # to avoid suggesting replacements for unprintable things like
4226  # CHECK(variable != iterator)
4227  #
4228  # The following pattern matches decimal, hex integers, strings, and
4229  # characters (in that order).
4230  lhs = lhs.strip()
4231  rhs = rhs.strip()
4232  match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
4233  if Match(match_constant, lhs) or Match(match_constant, rhs):
4234    # Note: since we know both lhs and rhs, we can provide a more
4235    # descriptive error message like:
4236    #   Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
4237    # Instead of:
4238    #   Consider using CHECK_EQ instead of CHECK(a == b)
4239    #
4240    # We are still keeping the less descriptive message because if lhs
4241    # or rhs gets long, the error message might become unreadable.
4242    error(filename, linenum, 'readability/check', 2,
4243          'Consider using %s instead of %s(a %s b)' % (
4244              _CHECK_REPLACEMENT[check_macro][operator],
4245              check_macro, operator))
4246
4247
4248def CheckAltTokens(filename, clean_lines, linenum, error):
4249  """Check alternative keywords being used in boolean expressions.
4250
4251  Args:
4252    filename: The name of the current file.
4253    clean_lines: A CleansedLines instance containing the file.
4254    linenum: The number of the line to check.
4255    error: The function to call with any errors found.
4256  """
4257  line = clean_lines.elided[linenum]
4258
4259  # Avoid preprocessor lines
4260  if Match(r'^\s*#', line):
4261    return
4262
4263  # Last ditch effort to avoid multi-line comments.  This will not help
4264  # if the comment started before the current line or ended after the
4265  # current line, but it catches most of the false positives.  At least,
4266  # it provides a way to workaround this warning for people who use
4267  # multi-line comments in preprocessor macros.
4268  #
4269  # TODO(unknown): remove this once cpplint has better support for
4270  # multi-line comments.
4271  if line.find('/*') >= 0 or line.find('*/') >= 0:
4272    return
4273
4274  for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
4275    error(filename, linenum, 'readability/alt_tokens', 2,
4276          'Use operator %s instead of %s' % (
4277              _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
4278
4279
4280def GetLineWidth(line):
4281  """Determines the width of the line in column positions.
4282
4283  Args:
4284    line: A string, which may be a Unicode string.
4285
4286  Returns:
4287    The width of the line in column positions, accounting for Unicode
4288    combining characters and wide characters.
4289  """
4290  if isinstance(line, unicode):
4291    width = 0
4292    for uc in unicodedata.normalize('NFC', line):
4293      if unicodedata.east_asian_width(uc) in ('W', 'F'):
4294        width += 2
4295      elif not unicodedata.combining(uc):
4296        # Issue 337
4297        # https://mail.python.org/pipermail/python-list/2012-August/628809.html
4298        if (sys.version_info.major, sys.version_info.minor) <= (3, 2):
4299          # https://github.com/python/cpython/blob/2.7/Include/unicodeobject.h#L81
4300          is_wide_build = sysconfig.get_config_var("Py_UNICODE_SIZE") >= 4
4301          # https://github.com/python/cpython/blob/2.7/Objects/unicodeobject.c#L564
4302          is_low_surrogate = 0xDC00 <= ord(uc) <= 0xDFFF
4303          if not is_wide_build and is_low_surrogate:
4304            width -= 1
4305
4306        width += 1
4307    return width
4308  else:
4309    return len(line)
4310
4311
4312def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
4313               error):
4314  """Checks rules from the 'C++ style rules' section of cppguide.html.
4315
4316  Most of these rules are hard to test (naming, comment style), but we
4317  do what we can.  In particular we check for 2-space indents, line lengths,
4318  tab usage, spaces inside code, etc.
4319
4320  Args:
4321    filename: The name of the current file.
4322    clean_lines: A CleansedLines instance containing the file.
4323    linenum: The number of the line to check.
4324    file_extension: The extension (without the dot) of the filename.
4325    nesting_state: A NestingState instance which maintains information about
4326                   the current stack of nested blocks being parsed.
4327    error: The function to call with any errors found.
4328  """
4329
4330  # Don't use "elided" lines here, otherwise we can't check commented lines.
4331  # Don't want to use "raw" either, because we don't want to check inside C++11
4332  # raw strings,
4333  raw_lines = clean_lines.lines_without_raw_strings
4334  line = raw_lines[linenum]
4335  prev = raw_lines[linenum - 1] if linenum > 0 else ''
4336
4337  if line.find('\t') != -1:
4338    error(filename, linenum, 'whitespace/tab', 1,
4339          'Tab found; better to use spaces')
4340
4341  # One or three blank spaces at the beginning of the line is weird; it's
4342  # hard to reconcile that with 2-space indents.
4343  # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
4344  # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
4345  # if(RLENGTH > 20) complain = 0;
4346  # if(match($0, " +(error|private|public|protected):")) complain = 0;
4347  # if(match(prev, "&& *$")) complain = 0;
4348  # if(match(prev, "\\|\\| *$")) complain = 0;
4349  # if(match(prev, "[\",=><] *$")) complain = 0;
4350  # if(match($0, " <<")) complain = 0;
4351  # if(match(prev, " +for \\(")) complain = 0;
4352  # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
4353  scope_or_label_pattern = r'\s*\w+\s*:\s*\\?$'
4354  classinfo = nesting_state.InnermostClass()
4355  initial_spaces = 0
4356  cleansed_line = clean_lines.elided[linenum]
4357  while initial_spaces < len(line) and line[initial_spaces] == ' ':
4358    initial_spaces += 1
4359  # There are certain situations we allow one space, notably for
4360  # section labels, and also lines containing multi-line raw strings.
4361  # We also don't check for lines that look like continuation lines
4362  # (of lines ending in double quotes, commas, equals, or angle brackets)
4363  # because the rules for how to indent those are non-trivial.
4364  if (not Search(r'[",=><] *$', prev) and
4365      (initial_spaces == 1 or initial_spaces == 3) and
4366      not Match(scope_or_label_pattern, cleansed_line) and
4367      not (clean_lines.raw_lines[linenum] != line and
4368           Match(r'^\s*""', line))):
4369    error(filename, linenum, 'whitespace/indent', 3,
4370          'Weird number of spaces at line-start.  '
4371          'Are you using a 2-space indent?')
4372
4373  if line and line[-1].isspace():
4374    error(filename, linenum, 'whitespace/end_of_line', 4,
4375          'Line ends in whitespace.  Consider deleting these extra spaces.')
4376
4377  # Check if the line is a header guard.
4378  is_header_guard = False
4379  if IsHeaderExtension(file_extension):
4380    cppvar = GetHeaderGuardCPPVariable(filename)
4381    if (line.startswith('#ifndef %s' % cppvar) or
4382        line.startswith('#define %s' % cppvar) or
4383        line.startswith('#endif  // %s' % cppvar)):
4384      is_header_guard = True
4385  # #include lines and header guards can be long, since there's no clean way to
4386  # split them.
4387  #
4388  # URLs can be long too.  It's possible to split these, but it makes them
4389  # harder to cut&paste.
4390  #
4391  # The "$Id:...$" comment may also get very long without it being the
4392  # developers fault.
4393  if (not line.startswith('#include') and not is_header_guard and
4394      not Match(r'^\s*//.*http(s?)://\S*$', line) and
4395      not Match(r'^\s*//\s*[^\s]*$', line) and
4396      not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
4397    line_width = GetLineWidth(line)
4398    if line_width > _line_length:
4399      error(filename, linenum, 'whitespace/line_length', 2,
4400            'Lines should be <= %i characters long' % _line_length)
4401
4402  if (cleansed_line.count(';') > 1 and
4403      # for loops are allowed two ;'s (and may run over two lines).
4404      cleansed_line.find('for') == -1 and
4405      (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
4406       GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
4407      # It's ok to have many commands in a switch case that fits in 1 line
4408      not ((cleansed_line.find('case ') != -1 or
4409            cleansed_line.find('default:') != -1) and
4410           cleansed_line.find('break;') != -1)):
4411    error(filename, linenum, 'whitespace/newline', 0,
4412          'More than one command on the same line')
4413
4414  # Some more style checks
4415  CheckBraces(filename, clean_lines, linenum, error)
4416  CheckTrailingSemicolon(filename, clean_lines, linenum, error)
4417  CheckEmptyBlockBody(filename, clean_lines, linenum, error)
4418  CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
4419  CheckOperatorSpacing(filename, clean_lines, linenum, error)
4420  CheckParenthesisSpacing(filename, clean_lines, linenum, error)
4421  CheckCommaSpacing(filename, clean_lines, linenum, error)
4422  CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error)
4423  CheckSpacingForFunctionCall(filename, clean_lines, linenum, error)
4424  CheckCheck(filename, clean_lines, linenum, error)
4425  CheckAltTokens(filename, clean_lines, linenum, error)
4426  classinfo = nesting_state.InnermostClass()
4427  if classinfo:
4428    CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
4429
4430
4431_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
4432# Matches the first component of a filename delimited by -s and _s. That is:
4433#  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
4434#  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
4435#  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
4436#  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
4437_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
4438
4439
4440def _DropCommonSuffixes(filename):
4441  """Drops common suffixes like _test.cc or -inl.h from filename.
4442
4443  For example:
4444    >>> _DropCommonSuffixes('foo/foo-inl.h')
4445    'foo/foo'
4446    >>> _DropCommonSuffixes('foo/bar/foo.cc')
4447    'foo/bar/foo'
4448    >>> _DropCommonSuffixes('foo/foo_internal.h')
4449    'foo/foo'
4450    >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
4451    'foo/foo_unusualinternal'
4452
4453  Args:
4454    filename: The input filename.
4455
4456  Returns:
4457    The filename with the common suffix removed.
4458  """
4459  for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
4460                 'inl.h', 'impl.h', 'internal.h'):
4461    if (filename.endswith(suffix) and len(filename) > len(suffix) and
4462        filename[-len(suffix) - 1] in ('-', '_')):
4463      return filename[:-len(suffix) - 1]
4464  return os.path.splitext(filename)[0]
4465
4466
4467def _ClassifyInclude(fileinfo, include, is_system):
4468  """Figures out what kind of header 'include' is.
4469
4470  Args:
4471    fileinfo: The current file cpplint is running over. A FileInfo instance.
4472    include: The path to a #included file.
4473    is_system: True if the #include used <> rather than "".
4474
4475  Returns:
4476    One of the _XXX_HEADER constants.
4477
4478  For example:
4479    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
4480    _C_SYS_HEADER
4481    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
4482    _CPP_SYS_HEADER
4483    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
4484    _LIKELY_MY_HEADER
4485    >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
4486    ...                  'bar/foo_other_ext.h', False)
4487    _POSSIBLE_MY_HEADER
4488    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
4489    _OTHER_HEADER
4490  """
4491  # This is a list of all standard c++ header files, except
4492  # those already checked for above.
4493  is_cpp_h = include in _CPP_HEADERS
4494
4495  if is_system:
4496    if is_cpp_h:
4497      return _CPP_SYS_HEADER
4498    else:
4499      return _C_SYS_HEADER
4500
4501  # If the target file and the include we're checking share a
4502  # basename when we drop common extensions, and the include
4503  # lives in . , then it's likely to be owned by the target file.
4504  target_dir, target_base = (
4505      os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
4506  include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
4507  if target_base == include_base and (
4508      include_dir == target_dir or
4509      include_dir == os.path.normpath(target_dir + '/../public')):
4510    return _LIKELY_MY_HEADER
4511
4512  # If the target and include share some initial basename
4513  # component, it's possible the target is implementing the
4514  # include, so it's allowed to be first, but we'll never
4515  # complain if it's not there.
4516  target_first_component = _RE_FIRST_COMPONENT.match(target_base)
4517  include_first_component = _RE_FIRST_COMPONENT.match(include_base)
4518  if (target_first_component and include_first_component and
4519      target_first_component.group(0) ==
4520      include_first_component.group(0)):
4521    return _POSSIBLE_MY_HEADER
4522
4523  return _OTHER_HEADER
4524
4525
4526
4527def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
4528  """Check rules that are applicable to #include lines.
4529
4530  Strings on #include lines are NOT removed from elided line, to make
4531  certain tasks easier. However, to prevent false positives, checks
4532  applicable to #include lines in CheckLanguage must be put here.
4533
4534  Args:
4535    filename: The name of the current file.
4536    clean_lines: A CleansedLines instance containing the file.
4537    linenum: The number of the line to check.
4538    include_state: An _IncludeState instance in which the headers are inserted.
4539    error: The function to call with any errors found.
4540  """
4541  fileinfo = FileInfo(filename)
4542  line = clean_lines.lines[linenum]
4543
4544  # "include" should use the new style "foo/bar.h" instead of just "bar.h"
4545  # Only do this check if the included header follows google naming
4546  # conventions.  If not, assume that it's a 3rd party API that
4547  # requires special include conventions.
4548  #
4549  # We also make an exception for Lua headers, which follow google
4550  # naming convention but not the include convention.
4551  match = Match(r'#include\s*"([^/]+\.h)"', line)
4552  if match and not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1)):
4553    error(filename, linenum, 'build/include', 4,
4554          'Include the directory when naming .h files')
4555
4556  # we shouldn't include a file more than once. actually, there are a
4557  # handful of instances where doing so is okay, but in general it's
4558  # not.
4559  match = _RE_PATTERN_INCLUDE.search(line)
4560  if match:
4561    include = match.group(2)
4562    is_system = (match.group(1) == '<')
4563    duplicate_line = include_state.FindHeader(include)
4564    if duplicate_line >= 0:
4565      error(filename, linenum, 'build/include', 4,
4566            '"%s" already included at %s:%s' %
4567            (include, filename, duplicate_line))
4568    elif (include.endswith('.cc') and
4569          os.path.dirname(fileinfo.RepositoryName()) != os.path.dirname(include)):
4570      error(filename, linenum, 'build/include', 4,
4571            'Do not include .cc files from other packages')
4572    elif not _THIRD_PARTY_HEADERS_PATTERN.match(include):
4573      include_state.include_list[-1].append((include, linenum))
4574
4575      # We want to ensure that headers appear in the right order:
4576      # 1) for foo.cc, foo.h  (preferred location)
4577      # 2) c system files
4578      # 3) cpp system files
4579      # 4) for foo.cc, foo.h  (deprecated location)
4580      # 5) other google headers
4581      #
4582      # We classify each include statement as one of those 5 types
4583      # using a number of techniques. The include_state object keeps
4584      # track of the highest type seen, and complains if we see a
4585      # lower type after that.
4586      error_message = include_state.CheckNextIncludeOrder(
4587          _ClassifyInclude(fileinfo, include, is_system))
4588      if error_message:
4589        error(filename, linenum, 'build/include_order', 4,
4590              '%s. Should be: %s.h, c system, c++ system, other.' %
4591              (error_message, fileinfo.BaseName()))
4592      canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
4593      if not include_state.IsInAlphabeticalOrder(
4594          clean_lines, linenum, canonical_include):
4595        error(filename, linenum, 'build/include_alpha', 4,
4596              'Include "%s" not in alphabetical order' % include)
4597      include_state.SetLastHeader(canonical_include)
4598
4599
4600
4601def _GetTextInside(text, start_pattern):
4602  r"""Retrieves all the text between matching open and close parentheses.
4603
4604  Given a string of lines and a regular expression string, retrieve all the text
4605  following the expression and between opening punctuation symbols like
4606  (, [, or {, and the matching close-punctuation symbol. This properly nested
4607  occurrences of the punctuations, so for the text like
4608    printf(a(), b(c()));
4609  a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
4610  start_pattern must match string having an open punctuation symbol at the end.
4611
4612  Args:
4613    text: The lines to extract text. Its comments and strings must be elided.
4614           It can be single line and can span multiple lines.
4615    start_pattern: The regexp string indicating where to start extracting
4616                   the text.
4617  Returns:
4618    The extracted text.
4619    None if either the opening string or ending punctuation could not be found.
4620  """
4621  # TODO(unknown): Audit cpplint.py to see what places could be profitably
4622  # rewritten to use _GetTextInside (and use inferior regexp matching today).
4623
4624  # Give opening punctuations to get the matching close-punctuations.
4625  matching_punctuation = {'(': ')', '{': '}', '[': ']'}
4626  closing_punctuation = set(matching_punctuation.itervalues())
4627
4628  # Find the position to start extracting text.
4629  match = re.search(start_pattern, text, re.M)
4630  if not match:  # start_pattern not found in text.
4631    return None
4632  start_position = match.end(0)
4633
4634  assert start_position > 0, (
4635      'start_pattern must ends with an opening punctuation.')
4636  assert text[start_position - 1] in matching_punctuation, (
4637      'start_pattern must ends with an opening punctuation.')
4638  # Stack of closing punctuations we expect to have in text after position.
4639  punctuation_stack = [matching_punctuation[text[start_position - 1]]]
4640  position = start_position
4641  while punctuation_stack and position < len(text):
4642    if text[position] == punctuation_stack[-1]:
4643      punctuation_stack.pop()
4644    elif text[position] in closing_punctuation:
4645      # A closing punctuation without matching opening punctuations.
4646      return None
4647    elif text[position] in matching_punctuation:
4648      punctuation_stack.append(matching_punctuation[text[position]])
4649    position += 1
4650  if punctuation_stack:
4651    # Opening punctuations left without matching close-punctuations.
4652    return None
4653  # punctuations match.
4654  return text[start_position:position - 1]
4655
4656
4657# Patterns for matching call-by-reference parameters.
4658#
4659# Supports nested templates up to 2 levels deep using this messy pattern:
4660#   < (?: < (?: < [^<>]*
4661#               >
4662#           |   [^<>] )*
4663#         >
4664#     |   [^<>] )*
4665#   >
4666_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*'  # =~ [[:alpha:]][[:alnum:]]*
4667_RE_PATTERN_TYPE = (
4668    r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
4669    r'(?:\w|'
4670    r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
4671    r'::)+')
4672# A call-by-reference parameter ends with '& identifier'.
4673_RE_PATTERN_REF_PARAM = re.compile(
4674    r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
4675    r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
4676# A call-by-const-reference parameter either ends with 'const& identifier'
4677# or looks like 'const type& identifier' when 'type' is atomic.
4678_RE_PATTERN_CONST_REF_PARAM = (
4679    r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
4680    r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
4681# Stream types.
4682_RE_PATTERN_REF_STREAM_PARAM = (
4683    r'(?:.*stream\s*&\s*' + _RE_PATTERN_IDENT + r')')
4684
4685
4686def CheckLanguage(filename, clean_lines, linenum, file_extension,
4687                  include_state, nesting_state, error):
4688  """Checks rules from the 'C++ language rules' section of cppguide.html.
4689
4690  Some of these rules are hard to test (function overloading, using
4691  uint32 inappropriately), but we do the best we can.
4692
4693  Args:
4694    filename: The name of the current file.
4695    clean_lines: A CleansedLines instance containing the file.
4696    linenum: The number of the line to check.
4697    file_extension: The extension (without the dot) of the filename.
4698    include_state: An _IncludeState instance in which the headers are inserted.
4699    nesting_state: A NestingState instance which maintains information about
4700                   the current stack of nested blocks being parsed.
4701    error: The function to call with any errors found.
4702  """
4703  # If the line is empty or consists of entirely a comment, no need to
4704  # check it.
4705  line = clean_lines.elided[linenum]
4706  if not line:
4707    return
4708
4709  match = _RE_PATTERN_INCLUDE.search(line)
4710  if match:
4711    CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
4712    return
4713
4714  # Reset include state across preprocessor directives.  This is meant
4715  # to silence warnings for conditional includes.
4716  match = Match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line)
4717  if match:
4718    include_state.ResetSection(match.group(1))
4719
4720  # Make Windows paths like Unix.
4721  fullname = os.path.abspath(filename).replace('\\', '/')
4722
4723  # Perform other checks now that we are sure that this is not an include line
4724  CheckCasts(filename, clean_lines, linenum, error)
4725  CheckGlobalStatic(filename, clean_lines, linenum, error)
4726  CheckPrintf(filename, clean_lines, linenum, error)
4727
4728  if IsHeaderExtension(file_extension):
4729    # TODO(unknown): check that 1-arg constructors are explicit.
4730    #                How to tell it's a constructor?
4731    #                (handled in CheckForNonStandardConstructs for now)
4732    # TODO(unknown): check that classes declare or disable copy/assign
4733    #                (level 1 error)
4734    pass
4735
4736  # Check if people are using the verboten C basic types.  The only exception
4737  # we regularly allow is "unsigned short port" for port.
4738  if Search(r'\bshort port\b', line):
4739    if not Search(r'\bunsigned short port\b', line):
4740      error(filename, linenum, 'runtime/int', 4,
4741            'Use "unsigned short" for ports, not "short"')
4742  else:
4743    match = Search(r'\b(short|long(?! +double)|long long)\b', line)
4744    if match:
4745      error(filename, linenum, 'runtime/int', 4,
4746            'Use int16/int64/etc, rather than the C type %s' % match.group(1))
4747
4748  # Check if some verboten operator overloading is going on
4749  # TODO(unknown): catch out-of-line unary operator&:
4750  #   class X {};
4751  #   int operator&(const X& x) { return 42; }  // unary operator&
4752  # The trick is it's hard to tell apart from binary operator&:
4753  #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
4754  if Search(r'\boperator\s*&\s*\(\s*\)', line):
4755    error(filename, linenum, 'runtime/operator', 4,
4756          'Unary operator& is dangerous.  Do not use it.')
4757
4758  # Check for suspicious usage of "if" like
4759  # } if (a == b) {
4760  if Search(r'\}\s*if\s*\(', line):
4761    error(filename, linenum, 'readability/braces', 4,
4762          'Did you mean "else if"? If not, start a new line for "if".')
4763
4764  # Check for potential format string bugs like printf(foo).
4765  # We constrain the pattern not to pick things like DocidForPrintf(foo).
4766  # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
4767  # TODO(unknown): Catch the following case. Need to change the calling
4768  # convention of the whole function to process multiple line to handle it.
4769  #   printf(
4770  #       boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
4771  printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
4772  if printf_args:
4773    match = Match(r'([\w.\->()]+)$', printf_args)
4774    if match and match.group(1) != '__VA_ARGS__':
4775      function_name = re.search(r'\b((?:string)?printf)\s*\(',
4776                                line, re.I).group(1)
4777      error(filename, linenum, 'runtime/printf', 4,
4778            'Potential format string bug. Do %s("%%s", %s) instead.'
4779            % (function_name, match.group(1)))
4780
4781  # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
4782  match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
4783  if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
4784    error(filename, linenum, 'runtime/memset', 4,
4785          'Did you mean "memset(%s, 0, %s)"?'
4786          % (match.group(1), match.group(2)))
4787
4788  if Search(r'\busing namespace\b', line):
4789    error(filename, linenum, 'build/namespaces', 5,
4790          'Do not use namespace using-directives.  '
4791          'Use using-declarations instead.')
4792
4793  # Detect variable-length arrays.
4794  match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
4795  if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
4796      match.group(3).find(']') == -1):
4797    # Split the size using space and arithmetic operators as delimiters.
4798    # If any of the resulting tokens are not compile time constants then
4799    # report the error.
4800    tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
4801    is_const = True
4802    skip_next = False
4803    for tok in tokens:
4804      if skip_next:
4805        skip_next = False
4806        continue
4807
4808      if Search(r'sizeof\(.+\)', tok): continue
4809      if Search(r'arraysize\(\w+\)', tok): continue
4810
4811      tok = tok.lstrip('(')
4812      tok = tok.rstrip(')')
4813      if not tok: continue
4814      if Match(r'\d+', tok): continue
4815      if Match(r'0[xX][0-9a-fA-F]+', tok): continue
4816      if Match(r'k[A-Z0-9]\w*', tok): continue
4817      if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
4818      if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
4819      # A catch all for tricky sizeof cases, including 'sizeof expression',
4820      # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
4821      # requires skipping the next token because we split on ' ' and '*'.
4822      if tok.startswith('sizeof'):
4823        skip_next = True
4824        continue
4825      is_const = False
4826      break
4827    if not is_const:
4828      error(filename, linenum, 'runtime/arrays', 1,
4829            'Do not use variable-length arrays.  Use an appropriately named '
4830            "('k' followed by CamelCase) compile-time constant for the size.")
4831
4832  # Check for use of unnamed namespaces in header files.  Registration
4833  # macros are typically OK, so we allow use of "namespace {" on lines
4834  # that end with backslashes.
4835  if (IsHeaderExtension(file_extension)
4836      and Search(r'\bnamespace\s*{', line)
4837      and line[-1] != '\\'):
4838    error(filename, linenum, 'build/namespaces', 4,
4839          'Do not use unnamed namespaces in header files.  See '
4840          'https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
4841          ' for more information.')
4842
4843
4844def CheckGlobalStatic(filename, clean_lines, linenum, error):
4845  """Check for unsafe global or static objects.
4846
4847  Args:
4848    filename: The name of the current file.
4849    clean_lines: A CleansedLines instance containing the file.
4850    linenum: The number of the line to check.
4851    error: The function to call with any errors found.
4852  """
4853  line = clean_lines.elided[linenum]
4854
4855  # Match two lines at a time to support multiline declarations
4856  if linenum + 1 < clean_lines.NumLines() and not Search(r'[;({]', line):
4857    line += clean_lines.elided[linenum + 1].strip()
4858
4859  # Check for people declaring static/global STL strings at the top level.
4860  # This is dangerous because the C++ language does not guarantee that
4861  # globals with constructors are initialized before the first access, and
4862  # also because globals can be destroyed when some threads are still running.
4863  # TODO(unknown): Generalize this to also find static unique_ptr instances.
4864  # TODO(unknown): File bugs for clang-tidy to find these.
4865  match = Match(
4866      r'((?:|static +)(?:|const +))(?::*std::)?string( +const)? +'
4867      r'([a-zA-Z0-9_:]+)\b(.*)',
4868      line)
4869
4870  # Remove false positives:
4871  # - String pointers (as opposed to values).
4872  #    string *pointer
4873  #    const string *pointer
4874  #    string const *pointer
4875  #    string *const pointer
4876  #
4877  # - Functions and template specializations.
4878  #    string Function<Type>(...
4879  #    string Class<Type>::Method(...
4880  #
4881  # - Operators.  These are matched separately because operator names
4882  #   cross non-word boundaries, and trying to match both operators
4883  #   and functions at the same time would decrease accuracy of
4884  #   matching identifiers.
4885  #    string Class::operator*()
4886  if (match and
4887      not Search(r'\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w', line) and
4888      not Search(r'\boperator\W', line) and
4889      not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4))):
4890    if Search(r'\bconst\b', line):
4891      error(filename, linenum, 'runtime/string', 4,
4892            'For a static/global string constant, use a C style string '
4893            'instead: "%schar%s %s[]".' %
4894            (match.group(1), match.group(2) or '', match.group(3)))
4895    else:
4896      error(filename, linenum, 'runtime/string', 4,
4897            'Static/global string variables are not permitted.')
4898
4899  if (Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line) or
4900      Search(r'\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)', line)):
4901    error(filename, linenum, 'runtime/init', 4,
4902          'You seem to be initializing a member variable with itself.')
4903
4904
4905def CheckPrintf(filename, clean_lines, linenum, error):
4906  """Check for printf related issues.
4907
4908  Args:
4909    filename: The name of the current file.
4910    clean_lines: A CleansedLines instance containing the file.
4911    linenum: The number of the line to check.
4912    error: The function to call with any errors found.
4913  """
4914  line = clean_lines.elided[linenum]
4915
4916  # When snprintf is used, the second argument shouldn't be a literal.
4917  match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
4918  if match and match.group(2) != '0':
4919    # If 2nd arg is zero, snprintf is used to calculate size.
4920    error(filename, linenum, 'runtime/printf', 3,
4921          'If you can, use sizeof(%s) instead of %s as the 2nd arg '
4922          'to snprintf.' % (match.group(1), match.group(2)))
4923
4924  # Check if some verboten C functions are being used.
4925  if Search(r'\bsprintf\s*\(', line):
4926    error(filename, linenum, 'runtime/printf', 5,
4927          'Never use sprintf. Use snprintf instead.')
4928  match = Search(r'\b(strcpy|strcat)\s*\(', line)
4929  if match:
4930    error(filename, linenum, 'runtime/printf', 4,
4931          'Almost always, snprintf is better than %s' % match.group(1))
4932
4933
4934def IsDerivedFunction(clean_lines, linenum):
4935  """Check if current line contains an inherited function.
4936
4937  Args:
4938    clean_lines: A CleansedLines instance containing the file.
4939    linenum: The number of the line to check.
4940  Returns:
4941    True if current line contains a function with "override"
4942    virt-specifier.
4943  """
4944  # Scan back a few lines for start of current function
4945  for i in xrange(linenum, max(-1, linenum - 10), -1):
4946    match = Match(r'^([^()]*\w+)\(', clean_lines.elided[i])
4947    if match:
4948      # Look for "override" after the matching closing parenthesis
4949      line, _, closing_paren = CloseExpression(
4950          clean_lines, i, len(match.group(1)))
4951      return (closing_paren >= 0 and
4952              Search(r'\boverride\b', line[closing_paren:]))
4953  return False
4954
4955
4956def IsOutOfLineMethodDefinition(clean_lines, linenum):
4957  """Check if current line contains an out-of-line method definition.
4958
4959  Args:
4960    clean_lines: A CleansedLines instance containing the file.
4961    linenum: The number of the line to check.
4962  Returns:
4963    True if current line contains an out-of-line method definition.
4964  """
4965  # Scan back a few lines for start of current function
4966  for i in xrange(linenum, max(-1, linenum - 10), -1):
4967    if Match(r'^([^()]*\w+)\(', clean_lines.elided[i]):
4968      return Match(r'^[^()]*\w+::\w+\(', clean_lines.elided[i]) is not None
4969  return False
4970
4971
4972def IsInitializerList(clean_lines, linenum):
4973  """Check if current line is inside constructor initializer list.
4974
4975  Args:
4976    clean_lines: A CleansedLines instance containing the file.
4977    linenum: The number of the line to check.
4978  Returns:
4979    True if current line appears to be inside constructor initializer
4980    list, False otherwise.
4981  """
4982  for i in xrange(linenum, 1, -1):
4983    line = clean_lines.elided[i]
4984    if i == linenum:
4985      remove_function_body = Match(r'^(.*)\{\s*$', line)
4986      if remove_function_body:
4987        line = remove_function_body.group(1)
4988
4989    if Search(r'\s:\s*\w+[({]', line):
4990      # A lone colon tend to indicate the start of a constructor
4991      # initializer list.  It could also be a ternary operator, which
4992      # also tend to appear in constructor initializer lists as
4993      # opposed to parameter lists.
4994      return True
4995    if Search(r'\}\s*,\s*$', line):
4996      # A closing brace followed by a comma is probably the end of a
4997      # brace-initialized member in constructor initializer list.
4998      return True
4999    if Search(r'[{};]\s*$', line):
5000      # Found one of the following:
5001      # - A closing brace or semicolon, probably the end of the previous
5002      #   function.
5003      # - An opening brace, probably the start of current class or namespace.
5004      #
5005      # Current line is probably not inside an initializer list since
5006      # we saw one of those things without seeing the starting colon.
5007      return False
5008
5009  # Got to the beginning of the file without seeing the start of
5010  # constructor initializer list.
5011  return False
5012
5013
5014def CheckForNonConstReference(filename, clean_lines, linenum,
5015                              nesting_state, error):
5016  """Check for non-const references.
5017
5018  Separate from CheckLanguage since it scans backwards from current
5019  line, instead of scanning forward.
5020
5021  Args:
5022    filename: The name of the current file.
5023    clean_lines: A CleansedLines instance containing the file.
5024    linenum: The number of the line to check.
5025    nesting_state: A NestingState instance which maintains information about
5026                   the current stack of nested blocks being parsed.
5027    error: The function to call with any errors found.
5028  """
5029  # Do nothing if there is no '&' on current line.
5030  line = clean_lines.elided[linenum]
5031  if '&' not in line:
5032    return
5033
5034  # If a function is inherited, current function doesn't have much of
5035  # a choice, so any non-const references should not be blamed on
5036  # derived function.
5037  if IsDerivedFunction(clean_lines, linenum):
5038    return
5039
5040  # Don't warn on out-of-line method definitions, as we would warn on the
5041  # in-line declaration, if it isn't marked with 'override'.
5042  if IsOutOfLineMethodDefinition(clean_lines, linenum):
5043    return
5044
5045  # Long type names may be broken across multiple lines, usually in one
5046  # of these forms:
5047  #   LongType
5048  #       ::LongTypeContinued &identifier
5049  #   LongType::
5050  #       LongTypeContinued &identifier
5051  #   LongType<
5052  #       ...>::LongTypeContinued &identifier
5053  #
5054  # If we detected a type split across two lines, join the previous
5055  # line to current line so that we can match const references
5056  # accordingly.
5057  #
5058  # Note that this only scans back one line, since scanning back
5059  # arbitrary number of lines would be expensive.  If you have a type
5060  # that spans more than 2 lines, please use a typedef.
5061  if linenum > 1:
5062    previous = None
5063    if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line):
5064      # previous_line\n + ::current_line
5065      previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$',
5066                        clean_lines.elided[linenum - 1])
5067    elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line):
5068      # previous_line::\n + current_line
5069      previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$',
5070                        clean_lines.elided[linenum - 1])
5071    if previous:
5072      line = previous.group(1) + line.lstrip()
5073    else:
5074      # Check for templated parameter that is split across multiple lines
5075      endpos = line.rfind('>')
5076      if endpos > -1:
5077        (_, startline, startpos) = ReverseCloseExpression(
5078            clean_lines, linenum, endpos)
5079        if startpos > -1 and startline < linenum:
5080          # Found the matching < on an earlier line, collect all
5081          # pieces up to current line.
5082          line = ''
5083          for i in xrange(startline, linenum + 1):
5084            line += clean_lines.elided[i].strip()
5085
5086  # Check for non-const references in function parameters.  A single '&' may
5087  # found in the following places:
5088  #   inside expression: binary & for bitwise AND
5089  #   inside expression: unary & for taking the address of something
5090  #   inside declarators: reference parameter
5091  # We will exclude the first two cases by checking that we are not inside a
5092  # function body, including one that was just introduced by a trailing '{'.
5093  # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
5094  if (nesting_state.previous_stack_top and
5095      not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or
5096           isinstance(nesting_state.previous_stack_top, _NamespaceInfo))):
5097    # Not at toplevel, not within a class, and not within a namespace
5098    return
5099
5100  # Avoid initializer lists.  We only need to scan back from the
5101  # current line for something that starts with ':'.
5102  #
5103  # We don't need to check the current line, since the '&' would
5104  # appear inside the second set of parentheses on the current line as
5105  # opposed to the first set.
5106  if linenum > 0:
5107    for i in xrange(linenum - 1, max(0, linenum - 10), -1):
5108      previous_line = clean_lines.elided[i]
5109      if not Search(r'[),]\s*$', previous_line):
5110        break
5111      if Match(r'^\s*:\s+\S', previous_line):
5112        return
5113
5114  # Avoid preprocessors
5115  if Search(r'\\\s*$', line):
5116    return
5117
5118  # Avoid constructor initializer lists
5119  if IsInitializerList(clean_lines, linenum):
5120    return
5121
5122  # We allow non-const references in a few standard places, like functions
5123  # called "swap()" or iostream operators like "<<" or ">>".  Do not check
5124  # those function parameters.
5125  #
5126  # We also accept & in static_assert, which looks like a function but
5127  # it's actually a declaration expression.
5128  allowed_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
5129                           r'operator\s*[<>][<>]|'
5130                           r'static_assert|COMPILE_ASSERT'
5131                           r')\s*\(')
5132  if Search(allowed_functions, line):
5133    return
5134  elif not Search(r'\S+\([^)]*$', line):
5135    # Don't see an allowed function on this line.  Actually we
5136    # didn't see any function name on this line, so this is likely a
5137    # multi-line parameter list.  Try a bit harder to catch this case.
5138    for i in xrange(2):
5139      if (linenum > i and
5140          Search(allowed_functions, clean_lines.elided[linenum - i - 1])):
5141        return
5142
5143  decls = ReplaceAll(r'{[^}]*}', ' ', line)  # exclude function body
5144  for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
5145    if (not Match(_RE_PATTERN_CONST_REF_PARAM, parameter) and
5146        not Match(_RE_PATTERN_REF_STREAM_PARAM, parameter)):
5147      error(filename, linenum, 'runtime/references', 2,
5148            'Is this a non-const reference? '
5149            'If so, make const or use a pointer: ' +
5150            ReplaceAll(' *<', '<', parameter))
5151
5152
5153def CheckCasts(filename, clean_lines, linenum, error):
5154  """Various cast related checks.
5155
5156  Args:
5157    filename: The name of the current file.
5158    clean_lines: A CleansedLines instance containing the file.
5159    linenum: The number of the line to check.
5160    error: The function to call with any errors found.
5161  """
5162  line = clean_lines.elided[linenum]
5163
5164  # Check to see if they're using an conversion function cast.
5165  # I just try to capture the most common basic types, though there are more.
5166  # Parameterless conversion functions, such as bool(), are allowed as they are
5167  # probably a member operator declaration or default constructor.
5168  match = Search(
5169      r'(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b'
5170      r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
5171      r'(\([^)].*)', line)
5172  expecting_function = ExpectingFunctionArgs(clean_lines, linenum)
5173  if match and not expecting_function:
5174    matched_type = match.group(2)
5175
5176    # matched_new_or_template is used to silence two false positives:
5177    # - New operators
5178    # - Template arguments with function types
5179    #
5180    # For template arguments, we match on types immediately following
5181    # an opening bracket without any spaces.  This is a fast way to
5182    # silence the common case where the function type is the first
5183    # template argument.  False negative with less-than comparison is
5184    # avoided because those operators are usually followed by a space.
5185    #
5186    #   function<double(double)>   // bracket + no space = false positive
5187    #   value < double(42)         // bracket + space = true positive
5188    matched_new_or_template = match.group(1)
5189
5190    # Avoid arrays by looking for brackets that come after the closing
5191    # parenthesis.
5192    if Match(r'\([^()]+\)\s*\[', match.group(3)):
5193      return
5194
5195    # Other things to ignore:
5196    # - Function pointers
5197    # - Casts to pointer types
5198    # - Placement new
5199    # - Alias declarations
5200    matched_funcptr = match.group(3)
5201    if (matched_new_or_template is None and
5202        not (matched_funcptr and
5203             (Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
5204                    matched_funcptr) or
5205              matched_funcptr.startswith('(*)'))) and
5206        not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and
5207        not Search(r'new\(\S+\)\s*' + matched_type, line)):
5208      error(filename, linenum, 'readability/casting', 4,
5209            'Using deprecated casting style.  '
5210            'Use static_cast<%s>(...) instead' %
5211            matched_type)
5212
5213  if not expecting_function:
5214    CheckCStyleCast(filename, clean_lines, linenum, 'static_cast',
5215                    r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
5216
5217  # This doesn't catch all cases. Consider (const char * const)"hello".
5218  #
5219  # (char *) "foo" should always be a const_cast (reinterpret_cast won't
5220  # compile).
5221  if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast',
5222                     r'\((char\s?\*+\s?)\)\s*"', error):
5223    pass
5224  else:
5225    # Check pointer casts for other than string constants
5226    CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast',
5227                    r'\((\w+\s?\*+\s?)\)', error)
5228
5229  # In addition, we look for people taking the address of a cast.  This
5230  # is dangerous -- casts can assign to temporaries, so the pointer doesn't
5231  # point where you think.
5232  #
5233  # Some non-identifier character is required before the '&' for the
5234  # expression to be recognized as a cast.  These are casts:
5235  #   expression = &static_cast<int*>(temporary());
5236  #   function(&(int*)(temporary()));
5237  #
5238  # This is not a cast:
5239  #   reference_type&(int* function_param);
5240  match = Search(
5241      r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|'
5242      r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line)
5243  if match:
5244    # Try a better error message when the & is bound to something
5245    # dereferenced by the casted pointer, as opposed to the casted
5246    # pointer itself.
5247    parenthesis_error = False
5248    match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line)
5249    if match:
5250      _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1)))
5251      if x1 >= 0 and clean_lines.elided[y1][x1] == '(':
5252        _, y2, x2 = CloseExpression(clean_lines, y1, x1)
5253        if x2 >= 0:
5254          extended_line = clean_lines.elided[y2][x2:]
5255          if y2 < clean_lines.NumLines() - 1:
5256            extended_line += clean_lines.elided[y2 + 1]
5257          if Match(r'\s*(?:->|\[)', extended_line):
5258            parenthesis_error = True
5259
5260    if parenthesis_error:
5261      error(filename, linenum, 'readability/casting', 4,
5262            ('Are you taking an address of something dereferenced '
5263             'from a cast?  Wrapping the dereferenced expression in '
5264             'parentheses will make the binding more obvious'))
5265    else:
5266      error(filename, linenum, 'runtime/casting', 4,
5267            ('Are you taking an address of a cast?  '
5268             'This is dangerous: could be a temp var.  '
5269             'Take the address before doing the cast, rather than after'))
5270
5271
5272def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error):
5273  """Checks for a C-style cast by looking for the pattern.
5274
5275  Args:
5276    filename: The name of the current file.
5277    clean_lines: A CleansedLines instance containing the file.
5278    linenum: The number of the line to check.
5279    cast_type: The string for the C++ cast to recommend.  This is either
5280      reinterpret_cast, static_cast, or const_cast, depending.
5281    pattern: The regular expression used to find C-style casts.
5282    error: The function to call with any errors found.
5283
5284  Returns:
5285    True if an error was emitted.
5286    False otherwise.
5287  """
5288  line = clean_lines.elided[linenum]
5289  match = Search(pattern, line)
5290  if not match:
5291    return False
5292
5293  # Exclude lines with keywords that tend to look like casts
5294  context = line[0:match.start(1) - 1]
5295  if Match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context):
5296    return False
5297
5298  # Try expanding current context to see if we one level of
5299  # parentheses inside a macro.
5300  if linenum > 0:
5301    for i in xrange(linenum - 1, max(0, linenum - 5), -1):
5302      context = clean_lines.elided[i] + context
5303  if Match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context):
5304    return False
5305
5306  # operator++(int) and operator--(int)
5307  if context.endswith(' operator++') or context.endswith(' operator--'):
5308    return False
5309
5310  # A single unnamed argument for a function tends to look like old style cast.
5311  # If we see those, don't issue warnings for deprecated casts.
5312  remainder = line[match.end(0):]
5313  if Match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)',
5314           remainder):
5315    return False
5316
5317  # At this point, all that should be left is actual casts.
5318  error(filename, linenum, 'readability/casting', 4,
5319        'Using C-style cast.  Use %s<%s>(...) instead' %
5320        (cast_type, match.group(1)))
5321
5322  return True
5323
5324
5325def ExpectingFunctionArgs(clean_lines, linenum):
5326  """Checks whether where function type arguments are expected.
5327
5328  Args:
5329    clean_lines: A CleansedLines instance containing the file.
5330    linenum: The number of the line to check.
5331
5332  Returns:
5333    True if the line at 'linenum' is inside something that expects arguments
5334    of function types.
5335  """
5336  line = clean_lines.elided[linenum]
5337  return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
5338          (linenum >= 2 and
5339           (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
5340                  clean_lines.elided[linenum - 1]) or
5341            Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
5342                  clean_lines.elided[linenum - 2]) or
5343            Search(r'\bstd::m?function\s*\<\s*$',
5344                   clean_lines.elided[linenum - 1]))))
5345
5346
5347_HEADERS_CONTAINING_TEMPLATES = (
5348    ('<deque>', ('deque',)),
5349    ('<functional>', ('unary_function', 'binary_function',
5350                      'plus', 'minus', 'multiplies', 'divides', 'modulus',
5351                      'negate',
5352                      'equal_to', 'not_equal_to', 'greater', 'less',
5353                      'greater_equal', 'less_equal',
5354                      'logical_and', 'logical_or', 'logical_not',
5355                      'unary_negate', 'not1', 'binary_negate', 'not2',
5356                      'bind1st', 'bind2nd',
5357                      'pointer_to_unary_function',
5358                      'pointer_to_binary_function',
5359                      'ptr_fun',
5360                      'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
5361                      'mem_fun_ref_t',
5362                      'const_mem_fun_t', 'const_mem_fun1_t',
5363                      'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
5364                      'mem_fun_ref',
5365                     )),
5366    ('<limits>', ('numeric_limits',)),
5367    ('<list>', ('list',)),
5368    ('<map>', ('map', 'multimap',)),
5369    ('<memory>', ('allocator', 'make_shared', 'make_unique', 'shared_ptr',
5370                  'unique_ptr', 'weak_ptr')),
5371    ('<queue>', ('queue', 'priority_queue',)),
5372    ('<set>', ('set', 'multiset',)),
5373    ('<stack>', ('stack',)),
5374    ('<string>', ('char_traits', 'basic_string',)),
5375    ('<tuple>', ('tuple',)),
5376    ('<unordered_map>', ('unordered_map', 'unordered_multimap')),
5377    ('<unordered_set>', ('unordered_set', 'unordered_multiset')),
5378    ('<utility>', ('pair',)),
5379    ('<vector>', ('vector',)),
5380
5381    # gcc extensions.
5382    # Note: std::hash is their hash, ::hash is our hash
5383    ('<hash_map>', ('hash_map', 'hash_multimap',)),
5384    ('<hash_set>', ('hash_set', 'hash_multiset',)),
5385    ('<slist>', ('slist',)),
5386    )
5387
5388_HEADERS_MAYBE_TEMPLATES = (
5389    ('<algorithm>', ('copy', 'max', 'min', 'min_element', 'sort',
5390                     'transform',
5391                    )),
5392    ('<utility>', ('forward', 'make_pair', 'move', 'swap')),
5393    )
5394
5395_RE_PATTERN_STRING = re.compile(r'\bstring\b')
5396
5397_re_pattern_headers_maybe_templates = []
5398for _header, _templates in _HEADERS_MAYBE_TEMPLATES:
5399  for _template in _templates:
5400    # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
5401    # type::max().
5402    _re_pattern_headers_maybe_templates.append(
5403        (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
5404            _template,
5405            _header))
5406
5407# Other scripts may reach in and modify this pattern.
5408_re_pattern_templates = []
5409for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
5410  for _template in _templates:
5411    _re_pattern_templates.append(
5412        (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
5413         _template + '<>',
5414         _header))
5415
5416
5417def FilesBelongToSameModule(filename_cc, filename_h):
5418  """Check if these two filenames belong to the same module.
5419
5420  The concept of a 'module' here is a as follows:
5421  foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
5422  same 'module' if they are in the same directory.
5423  some/path/public/xyzzy and some/path/internal/xyzzy are also considered
5424  to belong to the same module here.
5425
5426  If the filename_cc contains a longer path than the filename_h, for example,
5427  '/absolute/path/to/base/sysinfo.cc', and this file would include
5428  'base/sysinfo.h', this function also produces the prefix needed to open the
5429  header. This is used by the caller of this function to more robustly open the
5430  header file. We don't have access to the real include paths in this context,
5431  so we need this guesswork here.
5432
5433  Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
5434  according to this implementation. Because of this, this function gives
5435  some false positives. This should be sufficiently rare in practice.
5436
5437  Args:
5438    filename_cc: is the path for the .cc file
5439    filename_h: is the path for the header path
5440
5441  Returns:
5442    Tuple with a bool and a string:
5443    bool: True if filename_cc and filename_h belong to the same module.
5444    string: the additional prefix needed to open the header file.
5445  """
5446
5447  fileinfo = FileInfo(filename_cc)
5448  if not fileinfo.IsSource():
5449    return (False, '')
5450  filename_cc = filename_cc[:-len(fileinfo.Extension())]
5451  matched_test_suffix = Search(_TEST_FILE_SUFFIX, fileinfo.BaseName())
5452  if matched_test_suffix:
5453    filename_cc = filename_cc[:-len(matched_test_suffix.group(1))]
5454  filename_cc = filename_cc.replace('/public/', '/')
5455  filename_cc = filename_cc.replace('/internal/', '/')
5456
5457  if not filename_h.endswith('.h'):
5458    return (False, '')
5459  filename_h = filename_h[:-len('.h')]
5460  if filename_h.endswith('-inl'):
5461    filename_h = filename_h[:-len('-inl')]
5462  filename_h = filename_h.replace('/public/', '/')
5463  filename_h = filename_h.replace('/internal/', '/')
5464
5465  files_belong_to_same_module = filename_cc.endswith(filename_h)
5466  common_path = ''
5467  if files_belong_to_same_module:
5468    common_path = filename_cc[:-len(filename_h)]
5469  return files_belong_to_same_module, common_path
5470
5471
5472def UpdateIncludeState(filename, include_dict, io=codecs):
5473  """Fill up the include_dict with new includes found from the file.
5474
5475  Args:
5476    filename: the name of the header to read.
5477    include_dict: a dictionary in which the headers are inserted.
5478    io: The io factory to use to read the file. Provided for testability.
5479
5480  Returns:
5481    True if a header was successfully added. False otherwise.
5482  """
5483  headerfile = None
5484  try:
5485    headerfile = io.open(filename, 'r', 'utf8', 'replace')
5486  except IOError:
5487    return False
5488  linenum = 0
5489  for line in headerfile:
5490    linenum += 1
5491    clean_line = CleanseComments(line)
5492    match = _RE_PATTERN_INCLUDE.search(clean_line)
5493    if match:
5494      include = match.group(2)
5495      include_dict.setdefault(include, linenum)
5496  return True
5497
5498
5499def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
5500                              io=codecs):
5501  """Reports for missing stl includes.
5502
5503  This function will output warnings to make sure you are including the headers
5504  necessary for the stl containers and functions that you use. We only give one
5505  reason to include a header. For example, if you use both equal_to<> and
5506  less<> in a .h file, only one (the latter in the file) of these will be
5507  reported as a reason to include the <functional>.
5508
5509  Args:
5510    filename: The name of the current file.
5511    clean_lines: A CleansedLines instance containing the file.
5512    include_state: An _IncludeState instance.
5513    error: The function to call with any errors found.
5514    io: The IO factory to use to read the header file. Provided for unittest
5515        injection.
5516  """
5517  required = {}  # A map of header name to linenumber and the template entity.
5518                 # Example of required: { '<functional>': (1219, 'less<>') }
5519
5520  for linenum in xrange(clean_lines.NumLines()):
5521    line = clean_lines.elided[linenum]
5522    if not line or line[0] == '#':
5523      continue
5524
5525    # String is special -- it is a non-templatized type in STL.
5526    matched = _RE_PATTERN_STRING.search(line)
5527    if matched:
5528      # Don't warn about strings in non-STL namespaces:
5529      # (We check only the first match per line; good enough.)
5530      prefix = line[:matched.start()]
5531      if prefix.endswith('std::') or not prefix.endswith('::'):
5532        required['<string>'] = (linenum, 'string')
5533
5534    for pattern, template, header in _re_pattern_headers_maybe_templates:
5535      if pattern.search(line):
5536        required[header] = (linenum, template)
5537
5538    # The following function is just a speed up, no semantics are changed.
5539    if not '<' in line:  # Reduces the cpu time usage by skipping lines.
5540      continue
5541
5542    for pattern, template, header in _re_pattern_templates:
5543      matched = pattern.search(line)
5544      if matched:
5545        # Don't warn about IWYU in non-STL namespaces:
5546        # (We check only the first match per line; good enough.)
5547        prefix = line[:matched.start()]
5548        if prefix.endswith('std::') or not prefix.endswith('::'):
5549          required[header] = (linenum, template)
5550
5551  # The policy is that if you #include something in foo.h you don't need to
5552  # include it again in foo.cc. Here, we will look at possible includes.
5553  # Let's flatten the include_state include_list and copy it into a dictionary.
5554  include_dict = dict([item for sublist in include_state.include_list
5555                       for item in sublist])
5556
5557  # Did we find the header for this file (if any) and successfully load it?
5558  header_found = False
5559
5560  # Use the absolute path so that matching works properly.
5561  abs_filename = FileInfo(filename).FullName()
5562
5563  # For Emacs's flymake.
5564  # If cpplint is invoked from Emacs's flymake, a temporary file is generated
5565  # by flymake and that file name might end with '_flymake.cc'. In that case,
5566  # restore original file name here so that the corresponding header file can be
5567  # found.
5568  # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
5569  # instead of 'foo_flymake.h'
5570  abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
5571
5572  # include_dict is modified during iteration, so we iterate over a copy of
5573  # the keys.
5574  header_keys = include_dict.keys()
5575  for header in header_keys:
5576    (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
5577    fullpath = common_path + header
5578    if same_module and UpdateIncludeState(fullpath, include_dict, io):
5579      header_found = True
5580
5581  # If we can't find the header file for a .cc, assume it's because we don't
5582  # know where to look. In that case we'll give up as we're not sure they
5583  # didn't include it in the .h file.
5584  # TODO(unknown): Do a better job of finding .h files so we are confident that
5585  # not having the .h file means there isn't one.
5586  if filename.endswith('.cc') and not header_found:
5587    return
5588
5589  # All the lines have been processed, report the errors found.
5590  for required_header_unstripped in required:
5591    template = required[required_header_unstripped][1]
5592    if required_header_unstripped.strip('<>"') not in include_dict:
5593      error(filename, required[required_header_unstripped][0],
5594            'build/include_what_you_use', 4,
5595            'Add #include ' + required_header_unstripped + ' for ' + template)
5596
5597
5598_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
5599
5600
5601def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
5602  """Check that make_pair's template arguments are deduced.
5603
5604  G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are
5605  specified explicitly, and such use isn't intended in any case.
5606
5607  Args:
5608    filename: The name of the current file.
5609    clean_lines: A CleansedLines instance containing the file.
5610    linenum: The number of the line to check.
5611    error: The function to call with any errors found.
5612  """
5613  line = clean_lines.elided[linenum]
5614  match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
5615  if match:
5616    error(filename, linenum, 'build/explicit_make_pair',
5617          4,  # 4 = high confidence
5618          'For C++11-compatibility, omit template arguments from make_pair'
5619          ' OR use pair directly OR if appropriate, construct a pair directly')
5620
5621
5622def CheckRedundantVirtual(filename, clean_lines, linenum, error):
5623  """Check if line contains a redundant "virtual" function-specifier.
5624
5625  Args:
5626    filename: The name of the current file.
5627    clean_lines: A CleansedLines instance containing the file.
5628    linenum: The number of the line to check.
5629    error: The function to call with any errors found.
5630  """
5631  # Look for "virtual" on current line.
5632  line = clean_lines.elided[linenum]
5633  virtual = Match(r'^(.*)(\bvirtual\b)(.*)$', line)
5634  if not virtual: return
5635
5636  # Ignore "virtual" keywords that are near access-specifiers.  These
5637  # are only used in class base-specifier and do not apply to member
5638  # functions.
5639  if (Search(r'\b(public|protected|private)\s+$', virtual.group(1)) or
5640      Match(r'^\s+(public|protected|private)\b', virtual.group(3))):
5641    return
5642
5643  # Ignore the "virtual" keyword from virtual base classes.  Usually
5644  # there is a column on the same line in these cases (virtual base
5645  # classes are rare in google3 because multiple inheritance is rare).
5646  if Match(r'^.*[^:]:[^:].*$', line): return
5647
5648  # Look for the next opening parenthesis.  This is the start of the
5649  # parameter list (possibly on the next line shortly after virtual).
5650  # TODO(unknown): doesn't work if there are virtual functions with
5651  # decltype() or other things that use parentheses, but csearch suggests
5652  # that this is rare.
5653  end_col = -1
5654  end_line = -1
5655  start_col = len(virtual.group(2))
5656  for start_line in xrange(linenum, min(linenum + 3, clean_lines.NumLines())):
5657    line = clean_lines.elided[start_line][start_col:]
5658    parameter_list = Match(r'^([^(]*)\(', line)
5659    if parameter_list:
5660      # Match parentheses to find the end of the parameter list
5661      (_, end_line, end_col) = CloseExpression(
5662          clean_lines, start_line, start_col + len(parameter_list.group(1)))
5663      break
5664    start_col = 0
5665
5666  if end_col < 0:
5667    return  # Couldn't find end of parameter list, give up
5668
5669  # Look for "override" or "final" after the parameter list
5670  # (possibly on the next few lines).
5671  for i in xrange(end_line, min(end_line + 3, clean_lines.NumLines())):
5672    line = clean_lines.elided[i][end_col:]
5673    match = Search(r'\b(override|final)\b', line)
5674    if match:
5675      error(filename, linenum, 'readability/inheritance', 4,
5676            ('"virtual" is redundant since function is '
5677             'already declared as "%s"' % match.group(1)))
5678
5679    # Set end_col to check whole lines after we are done with the
5680    # first line.
5681    end_col = 0
5682    if Search(r'[^\w]\s*$', line):
5683      break
5684
5685
5686def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error):
5687  """Check if line contains a redundant "override" or "final" virt-specifier.
5688
5689  Args:
5690    filename: The name of the current file.
5691    clean_lines: A CleansedLines instance containing the file.
5692    linenum: The number of the line to check.
5693    error: The function to call with any errors found.
5694  """
5695  # Look for closing parenthesis nearby.  We need one to confirm where
5696  # the declarator ends and where the virt-specifier starts to avoid
5697  # false positives.
5698  line = clean_lines.elided[linenum]
5699  declarator_end = line.rfind(')')
5700  if declarator_end >= 0:
5701    fragment = line[declarator_end:]
5702  else:
5703    if linenum > 1 and clean_lines.elided[linenum - 1].rfind(')') >= 0:
5704      fragment = line
5705    else:
5706      return
5707
5708  # Check that at most one of "override" or "final" is present, not both
5709  if Search(r'\boverride\b', fragment) and Search(r'\bfinal\b', fragment):
5710    error(filename, linenum, 'readability/inheritance', 4,
5711          ('"override" is redundant since function is '
5712           'already declared as "final"'))
5713
5714
5715
5716
5717# Returns true if we are at a new block, and it is directly
5718# inside of a namespace.
5719def IsBlockInNameSpace(nesting_state, is_forward_declaration):
5720  """Checks that the new block is directly in a namespace.
5721
5722  Args:
5723    nesting_state: The _NestingState object that contains info about our state.
5724    is_forward_declaration: If the class is a forward declared class.
5725  Returns:
5726    Whether or not the new block is directly in a namespace.
5727  """
5728  if is_forward_declaration:
5729    if len(nesting_state.stack) >= 1 and (
5730        isinstance(nesting_state.stack[-1], _NamespaceInfo)):
5731      return True
5732    else:
5733      return False
5734
5735  return (len(nesting_state.stack) > 1 and
5736          nesting_state.stack[-1].check_namespace_indentation and
5737          isinstance(nesting_state.stack[-2], _NamespaceInfo))
5738
5739
5740def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item,
5741                                    raw_lines_no_comments, linenum):
5742  """This method determines if we should apply our namespace indentation check.
5743
5744  Args:
5745    nesting_state: The current nesting state.
5746    is_namespace_indent_item: If we just put a new class on the stack, True.
5747      If the top of the stack is not a class, or we did not recently
5748      add the class, False.
5749    raw_lines_no_comments: The lines without the comments.
5750    linenum: The current line number we are processing.
5751
5752  Returns:
5753    True if we should apply our namespace indentation check. Currently, it
5754    only works for classes and namespaces inside of a namespace.
5755  """
5756
5757  is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments,
5758                                                     linenum)
5759
5760  if not (is_namespace_indent_item or is_forward_declaration):
5761    return False
5762
5763  # If we are in a macro, we do not want to check the namespace indentation.
5764  if IsMacroDefinition(raw_lines_no_comments, linenum):
5765    return False
5766
5767  return IsBlockInNameSpace(nesting_state, is_forward_declaration)
5768
5769
5770# Call this method if the line is directly inside of a namespace.
5771# If the line above is blank (excluding comments) or the start of
5772# an inner namespace, it cannot be indented.
5773def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum,
5774                                    error):
5775  line = raw_lines_no_comments[linenum]
5776  if Match(r'^\s+', line):
5777    error(filename, linenum, 'runtime/indentation_namespace', 4,
5778          'Do not indent within a namespace')
5779
5780
5781def ProcessLine(filename, file_extension, clean_lines, line,
5782                include_state, function_state, nesting_state, error,
5783                extra_check_functions=[]):
5784  """Processes a single line in the file.
5785
5786  Args:
5787    filename: Filename of the file that is being processed.
5788    file_extension: The extension (dot not included) of the file.
5789    clean_lines: An array of strings, each representing a line of the file,
5790                 with comments stripped.
5791    line: Number of line being processed.
5792    include_state: An _IncludeState instance in which the headers are inserted.
5793    function_state: A _FunctionState instance which counts function lines, etc.
5794    nesting_state: A NestingState instance which maintains information about
5795                   the current stack of nested blocks being parsed.
5796    error: A callable to which errors are reported, which takes 4 arguments:
5797           filename, line number, error level, and message
5798    extra_check_functions: An array of additional check functions that will be
5799                           run on each source line. Each function takes 4
5800                           arguments: filename, clean_lines, line, error
5801  """
5802  raw_lines = clean_lines.raw_lines
5803  ParseNolintSuppressions(filename, raw_lines[line], line, error)
5804  nesting_state.Update(filename, clean_lines, line, error)
5805  CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line,
5806                               error)
5807  if nesting_state.InAsmBlock(): return
5808  CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
5809  CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
5810  CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
5811  CheckLanguage(filename, clean_lines, line, file_extension, include_state,
5812                nesting_state, error)
5813  CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
5814  CheckForNonStandardConstructs(filename, clean_lines, line,
5815                                nesting_state, error)
5816  CheckVlogArguments(filename, clean_lines, line, error)
5817  CheckPosixThreading(filename, clean_lines, line, error)
5818  CheckInvalidIncrement(filename, clean_lines, line, error)
5819  CheckMakePairUsesDeduction(filename, clean_lines, line, error)
5820  CheckRedundantVirtual(filename, clean_lines, line, error)
5821  CheckRedundantOverrideOrFinal(filename, clean_lines, line, error)
5822  for check_fn in extra_check_functions:
5823    check_fn(filename, clean_lines, line, error)
5824
5825def FlagCxx11Features(filename, clean_lines, linenum, error):
5826  """Flag those c++11 features that we only allow in certain places.
5827
5828  Args:
5829    filename: The name of the current file.
5830    clean_lines: A CleansedLines instance containing the file.
5831    linenum: The number of the line to check.
5832    error: The function to call with any errors found.
5833  """
5834  line = clean_lines.elided[linenum]
5835
5836  include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line)
5837
5838  # Flag unapproved C++ TR1 headers.
5839  if include and include.group(1).startswith('tr1/'):
5840    error(filename, linenum, 'build/c++tr1', 5,
5841          ('C++ TR1 headers such as <%s> are unapproved.') % include.group(1))
5842
5843  # Flag unapproved C++11 headers.
5844  if include and include.group(1) in ('cfenv',
5845                                      'condition_variable',
5846                                      'fenv.h',
5847                                      'future',
5848                                      'mutex',
5849                                      'thread',
5850                                      'chrono',
5851                                      'ratio',
5852                                      'regex',
5853                                      'system_error',
5854                                     ):
5855    error(filename, linenum, 'build/c++11', 5,
5856          ('<%s> is an unapproved C++11 header.') % include.group(1))
5857
5858  # The only place where we need to worry about C++11 keywords and library
5859  # features in preprocessor directives is in macro definitions.
5860  if Match(r'\s*#', line) and not Match(r'\s*#\s*define\b', line): return
5861
5862  # These are classes and free functions.  The classes are always
5863  # mentioned as std::*, but we only catch the free functions if
5864  # they're not found by ADL.  They're alphabetical by header.
5865  for top_name in (
5866      # type_traits
5867      'alignment_of',
5868      'aligned_union',
5869      ):
5870    if Search(r'\bstd::%s\b' % top_name, line):
5871      error(filename, linenum, 'build/c++11', 5,
5872            ('std::%s is an unapproved C++11 class or function.  Send c-style '
5873             'an example of where it would make your code more readable, and '
5874             'they may let you use it.') % top_name)
5875
5876
5877def FlagCxx14Features(filename, clean_lines, linenum, error):
5878  """Flag those C++14 features that we restrict.
5879
5880  Args:
5881    filename: The name of the current file.
5882    clean_lines: A CleansedLines instance containing the file.
5883    linenum: The number of the line to check.
5884    error: The function to call with any errors found.
5885  """
5886  line = clean_lines.elided[linenum]
5887
5888  include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line)
5889
5890  # Flag unapproved C++14 headers.
5891  if include and include.group(1) in ('scoped_allocator', 'shared_mutex'):
5892    error(filename, linenum, 'build/c++14', 5,
5893          ('<%s> is an unapproved C++14 header.') % include.group(1))
5894
5895
5896def ProcessFileData(filename, file_extension, lines, error,
5897                    extra_check_functions=[]):
5898  """Performs lint checks and reports any errors to the given error function.
5899
5900  Args:
5901    filename: Filename of the file that is being processed.
5902    file_extension: The extension (dot not included) of the file.
5903    lines: An array of strings, each representing a line of the file, with the
5904           last element being empty if the file is terminated with a newline.
5905    error: A callable to which errors are reported, which takes 4 arguments:
5906           filename, line number, error level, and message
5907    extra_check_functions: An array of additional check functions that will be
5908                           run on each source line. Each function takes 4
5909                           arguments: filename, clean_lines, line, error
5910  """
5911  lines = (['// marker so line numbers and indices both start at 1'] + lines +
5912           ['// marker so line numbers end in a known way'])
5913
5914  include_state = _IncludeState()
5915  function_state = _FunctionState()
5916  nesting_state = NestingState()
5917
5918  ResetNolintSuppressions()
5919
5920  CheckForCopyright(filename, lines, error)
5921  ProcessGlobalSuppresions(lines)
5922  RemoveMultiLineComments(filename, lines, error)
5923  clean_lines = CleansedLines(lines)
5924
5925  if IsHeaderExtension(file_extension):
5926    CheckForHeaderGuard(filename, clean_lines, error)
5927
5928  for line in xrange(clean_lines.NumLines()):
5929    ProcessLine(filename, file_extension, clean_lines, line,
5930                include_state, function_state, nesting_state, error,
5931                extra_check_functions)
5932    FlagCxx11Features(filename, clean_lines, line, error)
5933  nesting_state.CheckCompletedBlocks(filename, error)
5934
5935  CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
5936
5937  # Check that the .cc file has included its header if it exists.
5938  if _IsSourceExtension(file_extension):
5939    CheckHeaderFileIncluded(filename, include_state, error)
5940
5941  # We check here rather than inside ProcessLine so that we see raw
5942  # lines rather than "cleaned" lines.
5943  CheckForBadCharacters(filename, lines, error)
5944
5945  CheckForNewlineAtEOF(filename, lines, error)
5946
5947def ProcessConfigOverrides(filename):
5948  """ Loads the configuration files and processes the config overrides.
5949
5950  Args:
5951    filename: The name of the file being processed by the linter.
5952
5953  Returns:
5954    False if the current |filename| should not be processed further.
5955  """
5956
5957  abs_filename = os.path.abspath(filename)
5958  cfg_filters = []
5959  keep_looking = True
5960  while keep_looking:
5961    abs_path, base_name = os.path.split(abs_filename)
5962    if not base_name:
5963      break  # Reached the root directory.
5964
5965    cfg_file = os.path.join(abs_path, "CPPLINT.cfg")
5966    abs_filename = abs_path
5967    if not os.path.isfile(cfg_file):
5968      continue
5969
5970    try:
5971      with open(cfg_file) as file_handle:
5972        for line in file_handle:
5973          line, _, _ = line.partition('#')  # Remove comments.
5974          if not line.strip():
5975            continue
5976
5977          name, _, val = line.partition('=')
5978          name = name.strip()
5979          val = val.strip()
5980          if name == 'set noparent':
5981            keep_looking = False
5982          elif name == 'filter':
5983            cfg_filters.append(val)
5984          elif name == 'exclude_files':
5985            # When matching exclude_files pattern, use the base_name of
5986            # the current file name or the directory name we are processing.
5987            # For example, if we are checking for lint errors in /foo/bar/baz.cc
5988            # and we found the .cfg file at /foo/CPPLINT.cfg, then the config
5989            # file's "exclude_files" filter is meant to be checked against "bar"
5990            # and not "baz" nor "bar/baz.cc".
5991            if base_name:
5992              pattern = re.compile(val)
5993              if pattern.match(base_name):
5994                if _cpplint_state.quiet:
5995                  # Suppress "Ignoring file" warning when using --quiet.
5996                  return False
5997                sys.stderr.write('Ignoring "%s": file excluded by "%s". '
5998                                 'File path component "%s" matches '
5999                                 'pattern "%s"\n' %
6000                                 (filename, cfg_file, base_name, val))
6001                return False
6002          elif name == 'linelength':
6003            global _line_length
6004            try:
6005                _line_length = int(val)
6006            except ValueError:
6007                sys.stderr.write('Line length must be numeric.')
6008          elif name == 'root':
6009            global _root
6010            # root directories are specified relative to CPPLINT.cfg dir.
6011            _root = os.path.join(os.path.dirname(cfg_file), val)
6012          elif name == 'headers':
6013            ProcessHppHeadersOption(val)
6014          else:
6015            sys.stderr.write(
6016                'Invalid configuration option (%s) in file %s\n' %
6017                (name, cfg_file))
6018
6019    except IOError:
6020      sys.stderr.write(
6021          "Skipping config file '%s': Can't open for reading\n" % cfg_file)
6022      keep_looking = False
6023
6024  # Apply all the accumulated filters in reverse order (top-level directory
6025  # config options having the least priority).
6026  for filter in reversed(cfg_filters):
6027     _AddFilters(filter)
6028
6029  return True
6030
6031
6032def ProcessFile(filename, vlevel, extra_check_functions=[]):
6033  """Does google-lint on a single file.
6034
6035  Args:
6036    filename: The name of the file to parse.
6037
6038    vlevel: The level of errors to report.  Every error of confidence
6039    >= verbose_level will be reported.  0 is a good default.
6040
6041    extra_check_functions: An array of additional check functions that will be
6042                           run on each source line. Each function takes 4
6043                           arguments: filename, clean_lines, line, error
6044  """
6045
6046  _SetVerboseLevel(vlevel)
6047  _BackupFilters()
6048  old_errors = _cpplint_state.error_count
6049
6050  if not ProcessConfigOverrides(filename):
6051    _RestoreFilters()
6052    return
6053
6054  lf_lines = []
6055  crlf_lines = []
6056  try:
6057    # Support the UNIX convention of using "-" for stdin.  Note that
6058    # we are not opening the file with universal newline support
6059    # (which codecs doesn't support anyway), so the resulting lines do
6060    # contain trailing '\r' characters if we are reading a file that
6061    # has CRLF endings.
6062    # If after the split a trailing '\r' is present, it is removed
6063    # below.
6064    if filename == '-':
6065      lines = codecs.StreamReaderWriter(sys.stdin,
6066                                        codecs.getreader('utf8'),
6067                                        codecs.getwriter('utf8'),
6068                                        'replace').read().split('\n')
6069    else:
6070      lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
6071
6072    # Remove trailing '\r'.
6073    # The -1 accounts for the extra trailing blank line we get from split()
6074    for linenum in range(len(lines) - 1):
6075      if lines[linenum].endswith('\r'):
6076        lines[linenum] = lines[linenum].rstrip('\r')
6077        crlf_lines.append(linenum + 1)
6078      else:
6079        lf_lines.append(linenum + 1)
6080
6081  except IOError:
6082    sys.stderr.write(
6083        "Skipping input '%s': Can't open for reading\n" % filename)
6084    _RestoreFilters()
6085    return
6086
6087  # Note, if no dot is found, this will give the entire filename as the ext.
6088  file_extension = filename[filename.rfind('.') + 1:]
6089
6090  # When reading from stdin, the extension is unknown, so no cpplint tests
6091  # should rely on the extension.
6092  if filename != '-' and file_extension not in _valid_extensions:
6093    sys.stderr.write('Ignoring %s; not a valid file name '
6094                     '(%s)\n' % (filename, ', '.join(_valid_extensions)))
6095  else:
6096    ProcessFileData(filename, file_extension, lines, Error,
6097                    extra_check_functions)
6098
6099    # If end-of-line sequences are a mix of LF and CR-LF, issue
6100    # warnings on the lines with CR.
6101    #
6102    # Don't issue any warnings if all lines are uniformly LF or CR-LF,
6103    # since critique can handle these just fine, and the style guide
6104    # doesn't dictate a particular end of line sequence.
6105    #
6106    # We can't depend on os.linesep to determine what the desired
6107    # end-of-line sequence should be, since that will return the
6108    # server-side end-of-line sequence.
6109    if lf_lines and crlf_lines:
6110      # Warn on every line with CR.  An alternative approach might be to
6111      # check whether the file is mostly CRLF or just LF, and warn on the
6112      # minority, we bias toward LF here since most tools prefer LF.
6113      for linenum in crlf_lines:
6114        Error(filename, linenum, 'whitespace/newline', 1,
6115              'Unexpected \\r (^M) found; better to use only \\n')
6116
6117  # Suppress printing anything if --quiet was passed unless the error
6118  # count has increased after processing this file.
6119  if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count:
6120    sys.stdout.write('Done processing %s\n' % filename)
6121  _RestoreFilters()
6122
6123
6124def PrintUsage(message):
6125  """Prints a brief usage string and exits, optionally with an error message.
6126
6127  Args:
6128    message: The optional error message.
6129  """
6130  sys.stderr.write(_USAGE)
6131  if message:
6132    sys.exit('\nFATAL ERROR: ' + message)
6133  else:
6134    sys.exit(1)
6135
6136
6137def PrintCategories():
6138  """Prints a list of all the error-categories used by error messages.
6139
6140  These are the categories used to filter messages via --filter.
6141  """
6142  sys.stderr.write(''.join('  %s\n' % cat for cat in _ERROR_CATEGORIES))
6143  sys.exit(0)
6144
6145
6146def ParseArguments(args):
6147  """Parses the command line arguments.
6148
6149  This may set the output format and verbosity level as side-effects.
6150
6151  Args:
6152    args: The command line arguments:
6153
6154  Returns:
6155    The list of filenames to lint.
6156  """
6157  try:
6158    (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
6159                                                 'counting=',
6160                                                 'filter=',
6161                                                 'root=',
6162                                                 'linelength=',
6163                                                 'extensions=',
6164                                                 'headers=',
6165                                                 'quiet'])
6166  except getopt.GetoptError:
6167    PrintUsage('Invalid arguments.')
6168
6169  verbosity = _VerboseLevel()
6170  output_format = _OutputFormat()
6171  filters = ''
6172  quiet = _Quiet()
6173  counting_style = ''
6174
6175  for (opt, val) in opts:
6176    if opt == '--help':
6177      PrintUsage(None)
6178    elif opt == '--output':
6179      if val not in ('emacs', 'vs7', 'eclipse'):
6180        PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
6181      output_format = val
6182    elif opt == '--quiet':
6183      quiet = True
6184    elif opt == '--verbose':
6185      verbosity = int(val)
6186    elif opt == '--filter':
6187      filters = val
6188      if not filters:
6189        PrintCategories()
6190    elif opt == '--counting':
6191      if val not in ('total', 'toplevel', 'detailed'):
6192        PrintUsage('Valid counting options are total, toplevel, and detailed')
6193      counting_style = val
6194    elif opt == '--root':
6195      global _root
6196      _root = val
6197    elif opt == '--linelength':
6198      global _line_length
6199      try:
6200          _line_length = int(val)
6201      except ValueError:
6202          PrintUsage('Line length must be digits.')
6203    elif opt == '--extensions':
6204      global _valid_extensions
6205      try:
6206          _valid_extensions = set(val.split(','))
6207      except ValueError:
6208          PrintUsage('Extensions must be comma separated list.')
6209    elif opt == '--headers':
6210      ProcessHppHeadersOption(val)
6211
6212  if not filenames:
6213    PrintUsage('No files were specified.')
6214
6215  _SetOutputFormat(output_format)
6216  _SetQuiet(quiet)
6217  _SetVerboseLevel(verbosity)
6218  _SetFilters(filters)
6219  _SetCountingStyle(counting_style)
6220
6221  return filenames
6222
6223
6224def main():
6225  filenames = ParseArguments(sys.argv[1:])
6226
6227  # Change stderr to write with replacement characters so we don't die
6228  # if we try to print something containing non-ASCII characters.
6229  sys.stderr = codecs.StreamReaderWriter(sys.stderr,
6230                                         codecs.getreader('utf8'),
6231                                         codecs.getwriter('utf8'),
6232                                         'replace')
6233
6234  _cpplint_state.ResetErrorCounts()
6235  for filename in filenames:
6236    ProcessFile(filename, _cpplint_state.verbose_level)
6237  # If --quiet is passed, suppress printing error count unless there are errors.
6238  if not _cpplint_state.quiet or _cpplint_state.error_count > 0:
6239    _cpplint_state.PrintErrorCounts()
6240
6241  sys.exit(_cpplint_state.error_count > 0)
6242
6243
6244if __name__ == '__main__':
6245  main()
6246