1# python3
2# Copyright (C) 2019 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Grep warnings messages and output HTML tables or warning counts in CSV.
17
18Default is to output warnings in HTML tables grouped by warning severity.
19Use option --byproject to output tables grouped by source file projects.
20Use option --gencsv to output warning counts in CSV format.
21
22Default input file is build.log, which can be changed with the --log flag.
23"""
24
25# List of important data structures and functions in this script.
26#
27# To parse and keep warning message in the input file:
28#   severity:                classification of message severity
29#   warn_patterns:
30#   warn_patterns[w]['category']     tool that issued the warning, not used now
31#   warn_patterns[w]['description']  table heading
32#   warn_patterns[w]['members']      matched warnings from input
33#   warn_patterns[w]['patterns']     regular expressions to match warnings
34#   warn_patterns[w]['projects'][p]  number of warnings of pattern w in p
35#   warn_patterns[w]['severity']     severity tuple
36#   project_list[p][0]               project name
37#   project_list[p][1]               regular expression to match a project path
38#   project_patterns[p]              re.compile(project_list[p][1])
39#   project_names[p]                 project_list[p][0]
40#   warning_messages     array of each warning message, without source url
41#   warning_links        array of each warning code search link; for 'chrome'
42#   warning_records      array of [idx to warn_patterns,
43#                                  idx to project_names,
44#                                  idx to warning_messages,
45#                                  idx to warning_links]
46#   parse_input_file
47#
48import argparse
49import io
50import multiprocessing
51import os
52import re
53import sys
54
55# pylint:disable=relative-beyond-top-level
56# pylint:disable=g-importing-member
57from . import android_project_list
58from . import chrome_project_list
59from . import cpp_warn_patterns as cpp_patterns
60from . import html_writer
61from . import java_warn_patterns as java_patterns
62from . import make_warn_patterns as make_patterns
63from . import other_warn_patterns as other_patterns
64from . import tidy_warn_patterns as tidy_patterns
65
66
67def parse_args(use_google3):
68  """Define and parse the args. Return the parse_args() result."""
69  parser = argparse.ArgumentParser(
70      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
71  parser.add_argument('--capacitor_path', default='',
72                      help='Save capacitor warning file to the passed absolute'
73                      ' path')
74  # csvpath has a different naming than the above path because historically the
75  # original Android script used csvpath, so other scripts rely on it
76  parser.add_argument('--csvpath', default='',
77                      help='Save CSV warning file to the passed path')
78  parser.add_argument('--gencsv', action='store_true',
79                      help='Generate CSV file with number of various warnings')
80  parser.add_argument('--byproject', action='store_true',
81                      help='Separate warnings in HTML output by project names')
82  parser.add_argument('--url', default='',
83                      help='Root URL of an Android source code tree prefixed '
84                      'before files in warnings')
85  parser.add_argument('--separator', default='?l=',
86                      help='Separator between the end of a URL and the line '
87                      'number argument. e.g. #')
88  parser.add_argument('--processes', default=multiprocessing.cpu_count(),
89                      type=int,
90                      help='Number of parallel processes to process warnings')
91  # Old Android build scripts call warn.py without --platform,
92  # so the default platform is set to 'android'.
93  parser.add_argument('--platform', default='android',
94                      choices=['chrome', 'android'],
95                      help='Platform of the build log')
96  # Old Android build scripts call warn.py with only a build.log file path.
97  parser.add_argument('--log', help='Path to build log file')
98  parser.add_argument(dest='buildlog', metavar='build.log',
99                      default='build.log', nargs='?',
100                      help='Path to build.log file')
101  flags = parser.parse_args()
102  if not flags.log:
103    flags.log = flags.buildlog
104  if not use_google3 and not os.path.exists(flags.log):
105    sys.exit('Cannot find log file: ' + flags.log)
106  return flags
107
108
109def get_project_names(project_list):
110  """Get project_names from project_list."""
111  return [p[0] for p in project_list]
112
113
114def find_project_index(line, project_patterns):
115  for i, p in enumerate(project_patterns):
116    if p.match(line):
117      return i
118  return -1
119
120
121def classify_one_warning(warning, link, results, project_patterns,
122                         warn_patterns):
123  """Classify one warning line."""
124  for i, w in enumerate(warn_patterns):
125    for cpat in w['compiled_patterns']:
126      if cpat.match(warning):
127        p = find_project_index(warning, project_patterns)
128        results.append([warning, link, i, p])
129        return
130      else:
131        # If we end up here, there was a problem parsing the log
132        # probably caused by 'make -j' mixing the output from
133        # 2 or more concurrent compiles
134        pass
135
136
137def remove_prefix(s, sub):
138  """Remove everything before last occurrence of substring sub in string s."""
139  if sub in s:
140    inc_sub = s.rfind(sub)
141    return s[inc_sub:]
142  return s
143
144
145# TODO(emmavukelj): Don't have any generate_*_cs_link functions call
146# normalize_path a second time (the first time being in parse_input_file)
147def generate_cs_link(warning_line, flags, android_root=None):
148  if flags.platform == 'chrome':
149    return generate_chrome_cs_link(warning_line, flags)
150  if flags.platform == 'android':
151    return generate_android_cs_link(warning_line, flags, android_root)
152  return 'https://cs.corp.google.com/'
153
154
155def generate_android_cs_link(warning_line, flags, android_root):
156  """Generate the code search link for a warning line in Android."""
157  # max_splits=2 -> only 3 items
158  raw_path, line_number_str, _ = warning_line.split(':', 2)
159  normalized_path = normalize_path(raw_path, flags, android_root)
160  if not flags.url:
161    return normalized_path
162  link_path = flags.url + '/' + normalized_path
163  if line_number_str.isdigit():
164    link_path += flags.separator + line_number_str
165  return link_path
166
167
168def generate_chrome_cs_link(warning_line, flags):
169  """Generate the code search link for a warning line in Chrome."""
170  split_line = warning_line.split(':')
171  raw_path = split_line[0]
172  normalized_path = normalize_path(raw_path, flags)
173  link_base = 'https://cs.chromium.org/'
174  link_add = 'chromium'
175  link_path = None
176
177  # Basically just going through a few specific directory cases and specifying
178  # the proper behavior for that case. This list of cases was accumulated
179  # through trial and error manually going through the warnings.
180  #
181  # This code pattern of using case-specific "if"s instead of "elif"s looks
182  # possibly accidental and mistaken but it is intentional because some paths
183  # fall under several cases (e.g. third_party/lib/nghttp2_frame.c) and for
184  # those we want the most specific case to be applied. If there is reliable
185  # knowledge of exactly where these occur, this could be changed to "elif"s
186  # but there is no reliable set of paths falling under multiple cases at the
187  # moment.
188  if '/src/third_party' in raw_path:
189    link_path = remove_prefix(raw_path, '/src/third_party/')
190  if '/chrome_root/src_internal/' in raw_path:
191    link_path = remove_prefix(raw_path, '/chrome_root/src_internal/')
192    link_path = link_path[len('/chrome_root'):]  # remove chrome_root
193  if '/chrome_root/src/' in raw_path:
194    link_path = remove_prefix(raw_path, '/chrome_root/src/')
195    link_path = link_path[len('/chrome_root'):]  # remove chrome_root
196  if '/libassistant/' in raw_path:
197    link_add = 'eureka_internal/chromium/src'
198    link_base = 'https://cs.corp.google.com/'  # internal data
199    link_path = remove_prefix(normalized_path, '/libassistant/')
200  if raw_path.startswith('gen/'):
201    link_path = '/src/out/Debug/gen/' + normalized_path
202  if '/gen/' in raw_path:
203    return '%s?q=file:%s' % (link_base, remove_prefix(normalized_path, '/gen/'))
204
205  if not link_path and (raw_path.startswith('src/') or
206                        raw_path.startswith('src_internal/')):
207    link_path = '/%s' % raw_path
208
209  if not link_path:  # can't find specific link, send a query
210    return '%s?q=file:%s' % (link_base, normalized_path)
211
212  line_number = int(split_line[1])
213  link = '%s%s%s?l=%d' % (link_base, link_add, link_path, line_number)
214  return link
215
216
217def find_warn_py_and_android_root(path):
218  """Return android source root path if warn.py is found."""
219  parts = path.split('/')
220  for idx in reversed(range(2, len(parts))):
221    root_path = '/'.join(parts[:idx])
222    # Android root directory should contain this script.
223    if os.path.exists(root_path + '/build/make/tools/warn.py'):
224      return root_path
225  return ''
226
227
228def find_android_root(buildlog):
229  """Guess android source root from common prefix of file paths."""
230  # Use the longest common prefix of the absolute file paths
231  # of the first 10000 warning messages as the android_root.
232  warning_lines = []
233  warning_pattern = re.compile('^/[^ ]*/[^ ]*: warning: .*')
234  count = 0
235  for line in buildlog:
236    if warning_pattern.match(line):
237      warning_lines.append(line)
238      count += 1
239      if count > 9999:
240        break
241      # Try to find warn.py and use its location to find
242      # the source tree root.
243      if count < 100:
244        path = os.path.normpath(re.sub(':.*$', '', line))
245        android_root = find_warn_py_and_android_root(path)
246        if android_root:
247          return android_root
248  # Do not use common prefix of a small number of paths.
249  if count > 10:
250    # pytype: disable=wrong-arg-types
251    root_path = os.path.commonprefix(warning_lines)
252    # pytype: enable=wrong-arg-types
253    if len(root_path) > 2 and root_path[len(root_path) - 1] == '/':
254      return root_path[:-1]
255  return ''
256
257
258def remove_android_root_prefix(path, android_root):
259  """Remove android_root prefix from path if it is found."""
260  if path.startswith(android_root):
261    return path[1 + len(android_root):]
262  return path
263
264
265def normalize_path(path, flags, android_root=None):
266  """Normalize file path relative to src/ or src-internal/ directory."""
267  path = os.path.normpath(path)
268
269  if flags.platform == 'android':
270    if android_root:
271      return remove_android_root_prefix(path, android_root)
272    return path
273
274  # Remove known prefix of root path and normalize the suffix.
275  idx = path.find('chrome_root/')
276  if idx >= 0:
277    # remove chrome_root/, we want path relative to that
278    return path[idx + len('chrome_root/'):]
279  else:
280    return path
281
282
283def normalize_warning_line(line, flags, android_root=None):
284  """Normalize file path relative to src directory in a warning line."""
285  line = re.sub(u'[\u2018\u2019]', '\'', line)
286  # replace non-ASCII chars to spaces
287  line = re.sub(u'[^\x00-\x7f]', ' ', line)
288  line = line.strip()
289  first_column = line.find(':')
290  return normalize_path(line[:first_column], flags,
291                        android_root) + line[first_column:]
292
293
294def parse_input_file_chrome(infile, flags):
295  """Parse Chrome input file, collect parameters and warning lines."""
296  platform_version = 'unknown'
297  board_name = 'unknown'
298  architecture = 'unknown'
299
300  # only handle warning lines of format 'file_path:line_no:col_no: warning: ...'
301  chrome_warning_pattern = r'^[^ ]*/[^ ]*:[0-9]+:[0-9]+: warning: .*'
302
303  warning_pattern = re.compile(chrome_warning_pattern)
304
305  # Collect all unique warning lines
306  # Remove the duplicated warnings save ~8% of time when parsing
307  # one typical build log than before
308  unique_warnings = dict()
309  for line in infile:
310    if warning_pattern.match(line):
311      normalized_line = normalize_warning_line(line, flags)
312      if normalized_line not in unique_warnings:
313        unique_warnings[normalized_line] = generate_cs_link(line, flags)
314    elif (platform_version == 'unknown' or board_name == 'unknown' or
315          architecture == 'unknown'):
316      m = re.match(r'.+Package:.+chromeos-base/chromeos-chrome-', line)
317      if m is not None:
318        platform_version = 'R' + line.split('chrome-')[1].split('_')[0]
319        continue
320      m = re.match(r'.+Source\sunpacked\sin\s(.+)', line)
321      if m is not None:
322        board_name = m.group(1).split('/')[2]
323        continue
324      m = re.match(r'.+USE:\s*([^\s]*).*', line)
325      if m is not None:
326        architecture = m.group(1)
327        continue
328
329  header_str = '%s - %s - %s' % (platform_version, board_name, architecture)
330  return unique_warnings, header_str
331
332
333def add_normalized_line_to_warnings(line, flags, android_root, unique_warnings):
334  """Parse/normalize path, updating warning line and add to warnings dict."""
335  normalized_line = normalize_warning_line(line, flags, android_root)
336  if normalized_line not in unique_warnings:
337    unique_warnings[normalized_line] = generate_cs_link(line, flags,
338                                                        android_root)
339  return unique_warnings
340
341
342def parse_input_file_android(infile, flags):
343  """Parse Android input file, collect parameters and warning lines."""
344  platform_version = 'unknown'
345  target_product = 'unknown'
346  target_variant = 'unknown'
347  android_root = find_android_root(infile)
348  infile.seek(0)
349
350  # rustc warning messages have two lines that should be combined:
351  #     warning: description
352  #        --> file_path:line_number:column_number
353  # Some warning messages have no file name:
354  #     warning: macro replacement list ... [bugprone-macro-parentheses]
355  # Some makefile warning messages have no line number:
356  #     some/path/file.mk: warning: description
357  # C/C++ compiler warning messages have line and column numbers:
358  #     some/path/file.c:line_number:column_number: warning: description
359  warning_pattern = re.compile('(^[^ ]*/[^ ]*: warning: .*)|(^warning: .*)')
360  warning_without_file = re.compile('^warning: .*')
361  rustc_file_position = re.compile('^[ ]+--> [^ ]*/[^ ]*:[0-9]+:[0-9]+')
362
363   # Collect all unique warning lines
364  # Remove the duplicated warnings save ~8% of time when parsing
365  # one typical build log than before
366  unique_warnings = dict()
367  line_counter = 0
368  prev_warning = ''
369  for line in infile:
370    if prev_warning:
371      if rustc_file_position.match(line):
372        # must be a rustc warning, combine 2 lines into one warning
373        line = line.strip().replace('--> ', '') + ': ' + prev_warning
374        unique_warnings = add_normalized_line_to_warnings(
375            line, flags, android_root, unique_warnings)
376        prev_warning = ''
377        continue
378      # add prev_warning, and then process the current line
379      prev_warning = 'unknown_source_file: ' + prev_warning
380      unique_warnings = add_normalized_line_to_warnings(
381          prev_warning, flags, android_root, unique_warnings)
382      prev_warning = ''
383
384    if warning_pattern.match(line):
385      if warning_without_file.match(line):
386        # save this line and combine it with the next line
387        prev_warning = line
388      else:
389        unique_warnings = add_normalized_line_to_warnings(
390            line, flags, android_root, unique_warnings)
391      continue
392
393    if line_counter < 100:
394      # save a little bit of time by only doing this for the first few lines
395      line_counter += 1
396      m = re.search('(?<=^PLATFORM_VERSION=).*', line)
397      if m is not None:
398        platform_version = m.group(0)
399      m = re.search('(?<=^TARGET_PRODUCT=).*', line)
400      if m is not None:
401        target_product = m.group(0)
402      m = re.search('(?<=^TARGET_BUILD_VARIANT=).*', line)
403      if m is not None:
404        target_variant = m.group(0)
405      m = re.search('(?<=^TOP=).*', line)
406      if m is not None:
407        android_root = m.group(1)
408
409  if android_root:
410    new_unique_warnings = dict()
411    for warning_line in unique_warnings:
412      normalized_line = normalize_warning_line(warning_line, flags,
413                                               android_root)
414      new_unique_warnings[normalized_line] = generate_android_cs_link(
415          warning_line, flags, android_root)
416    unique_warnings = new_unique_warnings
417
418  header_str = '%s - %s - %s' % (platform_version, target_product,
419                                 target_variant)
420  return unique_warnings, header_str
421
422
423def parse_input_file(infile, flags):
424  if flags.platform == 'chrome':
425    return parse_input_file_chrome(infile, flags)
426  if flags.platform == 'android':
427    return parse_input_file_android(infile, flags)
428  raise RuntimeError('parse_input_file not defined for platform %s' %
429                     flags.platform)
430
431
432def parse_compiler_output(compiler_output):
433  """Parse compiler output for relevant info."""
434  split_output = compiler_output.split(':', 3)  # 3 = max splits
435  file_path = split_output[0]
436  line_number = int(split_output[1])
437  col_number = int(split_output[2].split(' ')[0])
438  warning_message = split_output[3]
439  return file_path, line_number, col_number, warning_message
440
441
442def get_warn_patterns(platform):
443  """Get and initialize warn_patterns."""
444  warn_patterns = []
445  if platform == 'chrome':
446    warn_patterns = cpp_patterns.warn_patterns
447  elif platform == 'android':
448    warn_patterns = make_patterns.warn_patterns + cpp_patterns.warn_patterns + java_patterns.warn_patterns + tidy_patterns.warn_patterns + other_patterns.warn_patterns
449  else:
450    raise Exception('platform name %s is not valid' % platform)
451  for w in warn_patterns:
452    w['members'] = []
453    # Each warning pattern has a 'projects' dictionary, that
454    # maps a project name to number of warnings in that project.
455    w['projects'] = {}
456  return warn_patterns
457
458
459def get_project_list(platform):
460  """Return project list for appropriate platform."""
461  if platform == 'chrome':
462    return chrome_project_list.project_list
463  if platform == 'android':
464    return android_project_list.project_list
465  raise Exception('platform name %s is not valid' % platform)
466
467
468def parallel_classify_warnings(warning_data, args, project_names,
469                               project_patterns, warn_patterns,
470                               use_google3, create_launch_subprocs_fn,
471                               classify_warnings_fn):
472  """Classify all warning lines with num_cpu parallel processes."""
473  num_cpu = args.processes
474  group_results = []
475
476  if num_cpu > 1:
477    # set up parallel processing for this...
478    warning_groups = [[] for _ in range(num_cpu)]
479    i = 0
480    for warning, link in warning_data.items():
481      warning_groups[i].append((warning, link))
482      i = (i + 1) % num_cpu
483    arg_groups = [[] for _ in range(num_cpu)]
484    for i, group in enumerate(warning_groups):
485      arg_groups[i] = [{
486          'group': group,
487          'project_patterns': project_patterns,
488          'warn_patterns': warn_patterns,
489          'num_processes': num_cpu
490      }]
491
492    group_results = create_launch_subprocs_fn(num_cpu,
493                                              classify_warnings_fn,
494                                              arg_groups,
495                                              group_results)
496  else:
497    group_results = []
498    for warning, link in warning_data.items():
499      classify_one_warning(warning, link, group_results,
500                           project_patterns, warn_patterns)
501    group_results = [group_results]
502
503  warning_messages = []
504  warning_links = []
505  warning_records = []
506  if use_google3:
507    group_results = [group_results]
508  for group_result in group_results:
509    for result in group_result:
510      for line, link, pattern_idx, project_idx in result:
511        pattern = warn_patterns[pattern_idx]
512        pattern['members'].append(line)
513        message_idx = len(warning_messages)
514        warning_messages.append(line)
515        link_idx = len(warning_links)
516        warning_links.append(link)
517        warning_records.append([pattern_idx, project_idx, message_idx,
518                                link_idx])
519        pname = '???' if project_idx < 0 else project_names[project_idx]
520        # Count warnings by project.
521        if pname in pattern['projects']:
522          pattern['projects'][pname] += 1
523        else:
524          pattern['projects'][pname] = 1
525  return warning_messages, warning_links, warning_records
526
527
528def process_log(logfile, flags, project_names, project_patterns, warn_patterns,
529                html_path, use_google3, create_launch_subprocs_fn,
530                classify_warnings_fn, logfile_object):
531  # pylint: disable=g-doc-args
532  # pylint: disable=g-doc-return-or-yield
533  """Function that handles processing of a log.
534
535  This is isolated into its own function (rather than just taking place in main)
536  so that it can be used by both warn.py and the borg job process_gs_logs.py, to
537  avoid duplication of code.
538  Note that if the arguments to this function change, process_gs_logs.py must
539  be updated accordingly.
540  """
541  if logfile_object is None:
542    with io.open(logfile, encoding='utf-8') as log:
543      warning_lines_and_links, header_str = parse_input_file(log, flags)
544  else:
545    warning_lines_and_links, header_str = parse_input_file(
546        logfile_object, flags)
547  warning_messages, warning_links, warning_records = parallel_classify_warnings(
548      warning_lines_and_links, flags, project_names, project_patterns,
549      warn_patterns, use_google3, create_launch_subprocs_fn,
550      classify_warnings_fn)
551
552  html_writer.write_html(flags, project_names, warn_patterns, html_path,
553                         warning_messages, warning_links, warning_records,
554                         header_str)
555
556  return warning_messages, warning_links, warning_records, header_str
557
558
559def common_main(use_google3, create_launch_subprocs_fn, classify_warnings_fn,
560                logfile_object=None):
561  """Shared main function for Google3 and non-Google3 versions of warn.py."""
562  flags = parse_args(use_google3)
563  warn_patterns = get_warn_patterns(flags.platform)
564  project_list = get_project_list(flags.platform)
565
566  project_names = get_project_names(project_list)
567  project_patterns = [re.compile(p[1]) for p in project_list]
568
569  # html_path=None because we output html below if not outputting CSV
570  warning_messages, warning_links, warning_records, header_str = process_log(
571      logfile=flags.log, flags=flags, project_names=project_names,
572      project_patterns=project_patterns, warn_patterns=warn_patterns,
573      html_path=None, use_google3=use_google3,
574      create_launch_subprocs_fn=create_launch_subprocs_fn,
575      classify_warnings_fn=classify_warnings_fn,
576      logfile_object=logfile_object)
577
578  html_writer.write_out_csv(flags, warn_patterns, warning_messages,
579                            warning_links, warning_records, header_str,
580                            project_names)
581
582  # Return these values, so that caller can use them, if desired.
583  return flags, warning_messages, warning_records, warn_patterns
584