1# python3 2# Copyright (C) 2019 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Grep warnings messages and output HTML tables or warning counts in CSV. 17 18Default is to output warnings in HTML tables grouped by warning severity. 19Use option --byproject to output tables grouped by source file projects. 20Use option --gencsv to output warning counts in CSV format. 21 22Default input file is build.log, which can be changed with the --log flag. 23""" 24 25# List of important data structures and functions in this script. 26# 27# To parse and keep warning message in the input file: 28# severity: classification of message severity 29# warn_patterns: 30# warn_patterns[w]['category'] tool that issued the warning, not used now 31# warn_patterns[w]['description'] table heading 32# warn_patterns[w]['members'] matched warnings from input 33# warn_patterns[w]['patterns'] regular expressions to match warnings 34# warn_patterns[w]['projects'][p] number of warnings of pattern w in p 35# warn_patterns[w]['severity'] severity tuple 36# project_list[p][0] project name 37# project_list[p][1] regular expression to match a project path 38# project_patterns[p] re.compile(project_list[p][1]) 39# project_names[p] project_list[p][0] 40# warning_messages array of each warning message, without source url 41# warning_links array of each warning code search link; for 'chrome' 42# warning_records array of [idx to warn_patterns, 43# idx to project_names, 44# idx to warning_messages, 45# idx to warning_links] 46# parse_input_file 47# 48import argparse 49import io 50import multiprocessing 51import os 52import re 53import sys 54 55# pylint:disable=relative-beyond-top-level 56# pylint:disable=g-importing-member 57from . import android_project_list 58from . import chrome_project_list 59from . import cpp_warn_patterns as cpp_patterns 60from . import html_writer 61from . import java_warn_patterns as java_patterns 62from . import make_warn_patterns as make_patterns 63from . import other_warn_patterns as other_patterns 64from . import tidy_warn_patterns as tidy_patterns 65 66 67def parse_args(use_google3): 68 """Define and parse the args. Return the parse_args() result.""" 69 parser = argparse.ArgumentParser( 70 description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) 71 parser.add_argument('--capacitor_path', default='', 72 help='Save capacitor warning file to the passed absolute' 73 ' path') 74 # csvpath has a different naming than the above path because historically the 75 # original Android script used csvpath, so other scripts rely on it 76 parser.add_argument('--csvpath', default='', 77 help='Save CSV warning file to the passed path') 78 parser.add_argument('--gencsv', action='store_true', 79 help='Generate CSV file with number of various warnings') 80 parser.add_argument('--byproject', action='store_true', 81 help='Separate warnings in HTML output by project names') 82 parser.add_argument('--url', default='', 83 help='Root URL of an Android source code tree prefixed ' 84 'before files in warnings') 85 parser.add_argument('--separator', default='?l=', 86 help='Separator between the end of a URL and the line ' 87 'number argument. e.g. #') 88 parser.add_argument('--processes', default=multiprocessing.cpu_count(), 89 type=int, 90 help='Number of parallel processes to process warnings') 91 # Old Android build scripts call warn.py without --platform, 92 # so the default platform is set to 'android'. 93 parser.add_argument('--platform', default='android', 94 choices=['chrome', 'android'], 95 help='Platform of the build log') 96 # Old Android build scripts call warn.py with only a build.log file path. 97 parser.add_argument('--log', help='Path to build log file') 98 parser.add_argument(dest='buildlog', metavar='build.log', 99 default='build.log', nargs='?', 100 help='Path to build.log file') 101 flags = parser.parse_args() 102 if not flags.log: 103 flags.log = flags.buildlog 104 if not use_google3 and not os.path.exists(flags.log): 105 sys.exit('Cannot find log file: ' + flags.log) 106 return flags 107 108 109def get_project_names(project_list): 110 """Get project_names from project_list.""" 111 return [p[0] for p in project_list] 112 113 114def find_project_index(line, project_patterns): 115 for i, p in enumerate(project_patterns): 116 if p.match(line): 117 return i 118 return -1 119 120 121def classify_one_warning(warning, link, results, project_patterns, 122 warn_patterns): 123 """Classify one warning line.""" 124 for i, w in enumerate(warn_patterns): 125 for cpat in w['compiled_patterns']: 126 if cpat.match(warning): 127 p = find_project_index(warning, project_patterns) 128 results.append([warning, link, i, p]) 129 return 130 else: 131 # If we end up here, there was a problem parsing the log 132 # probably caused by 'make -j' mixing the output from 133 # 2 or more concurrent compiles 134 pass 135 136 137def remove_prefix(s, sub): 138 """Remove everything before last occurrence of substring sub in string s.""" 139 if sub in s: 140 inc_sub = s.rfind(sub) 141 return s[inc_sub:] 142 return s 143 144 145# TODO(emmavukelj): Don't have any generate_*_cs_link functions call 146# normalize_path a second time (the first time being in parse_input_file) 147def generate_cs_link(warning_line, flags, android_root=None): 148 if flags.platform == 'chrome': 149 return generate_chrome_cs_link(warning_line, flags) 150 if flags.platform == 'android': 151 return generate_android_cs_link(warning_line, flags, android_root) 152 return 'https://cs.corp.google.com/' 153 154 155def generate_android_cs_link(warning_line, flags, android_root): 156 """Generate the code search link for a warning line in Android.""" 157 # max_splits=2 -> only 3 items 158 raw_path, line_number_str, _ = warning_line.split(':', 2) 159 normalized_path = normalize_path(raw_path, flags, android_root) 160 if not flags.url: 161 return normalized_path 162 link_path = flags.url + '/' + normalized_path 163 if line_number_str.isdigit(): 164 link_path += flags.separator + line_number_str 165 return link_path 166 167 168def generate_chrome_cs_link(warning_line, flags): 169 """Generate the code search link for a warning line in Chrome.""" 170 split_line = warning_line.split(':') 171 raw_path = split_line[0] 172 normalized_path = normalize_path(raw_path, flags) 173 link_base = 'https://cs.chromium.org/' 174 link_add = 'chromium' 175 link_path = None 176 177 # Basically just going through a few specific directory cases and specifying 178 # the proper behavior for that case. This list of cases was accumulated 179 # through trial and error manually going through the warnings. 180 # 181 # This code pattern of using case-specific "if"s instead of "elif"s looks 182 # possibly accidental and mistaken but it is intentional because some paths 183 # fall under several cases (e.g. third_party/lib/nghttp2_frame.c) and for 184 # those we want the most specific case to be applied. If there is reliable 185 # knowledge of exactly where these occur, this could be changed to "elif"s 186 # but there is no reliable set of paths falling under multiple cases at the 187 # moment. 188 if '/src/third_party' in raw_path: 189 link_path = remove_prefix(raw_path, '/src/third_party/') 190 if '/chrome_root/src_internal/' in raw_path: 191 link_path = remove_prefix(raw_path, '/chrome_root/src_internal/') 192 link_path = link_path[len('/chrome_root'):] # remove chrome_root 193 if '/chrome_root/src/' in raw_path: 194 link_path = remove_prefix(raw_path, '/chrome_root/src/') 195 link_path = link_path[len('/chrome_root'):] # remove chrome_root 196 if '/libassistant/' in raw_path: 197 link_add = 'eureka_internal/chromium/src' 198 link_base = 'https://cs.corp.google.com/' # internal data 199 link_path = remove_prefix(normalized_path, '/libassistant/') 200 if raw_path.startswith('gen/'): 201 link_path = '/src/out/Debug/gen/' + normalized_path 202 if '/gen/' in raw_path: 203 return '%s?q=file:%s' % (link_base, remove_prefix(normalized_path, '/gen/')) 204 205 if not link_path and (raw_path.startswith('src/') or 206 raw_path.startswith('src_internal/')): 207 link_path = '/%s' % raw_path 208 209 if not link_path: # can't find specific link, send a query 210 return '%s?q=file:%s' % (link_base, normalized_path) 211 212 line_number = int(split_line[1]) 213 link = '%s%s%s?l=%d' % (link_base, link_add, link_path, line_number) 214 return link 215 216 217def find_warn_py_and_android_root(path): 218 """Return android source root path if warn.py is found.""" 219 parts = path.split('/') 220 for idx in reversed(range(2, len(parts))): 221 root_path = '/'.join(parts[:idx]) 222 # Android root directory should contain this script. 223 if os.path.exists(root_path + '/build/make/tools/warn.py'): 224 return root_path 225 return '' 226 227 228def find_android_root(buildlog): 229 """Guess android source root from common prefix of file paths.""" 230 # Use the longest common prefix of the absolute file paths 231 # of the first 10000 warning messages as the android_root. 232 warning_lines = [] 233 warning_pattern = re.compile('^/[^ ]*/[^ ]*: warning: .*') 234 count = 0 235 for line in buildlog: 236 if warning_pattern.match(line): 237 warning_lines.append(line) 238 count += 1 239 if count > 9999: 240 break 241 # Try to find warn.py and use its location to find 242 # the source tree root. 243 if count < 100: 244 path = os.path.normpath(re.sub(':.*$', '', line)) 245 android_root = find_warn_py_and_android_root(path) 246 if android_root: 247 return android_root 248 # Do not use common prefix of a small number of paths. 249 if count > 10: 250 # pytype: disable=wrong-arg-types 251 root_path = os.path.commonprefix(warning_lines) 252 # pytype: enable=wrong-arg-types 253 if len(root_path) > 2 and root_path[len(root_path) - 1] == '/': 254 return root_path[:-1] 255 return '' 256 257 258def remove_android_root_prefix(path, android_root): 259 """Remove android_root prefix from path if it is found.""" 260 if path.startswith(android_root): 261 return path[1 + len(android_root):] 262 return path 263 264 265def normalize_path(path, flags, android_root=None): 266 """Normalize file path relative to src/ or src-internal/ directory.""" 267 path = os.path.normpath(path) 268 269 if flags.platform == 'android': 270 if android_root: 271 return remove_android_root_prefix(path, android_root) 272 return path 273 274 # Remove known prefix of root path and normalize the suffix. 275 idx = path.find('chrome_root/') 276 if idx >= 0: 277 # remove chrome_root/, we want path relative to that 278 return path[idx + len('chrome_root/'):] 279 else: 280 return path 281 282 283def normalize_warning_line(line, flags, android_root=None): 284 """Normalize file path relative to src directory in a warning line.""" 285 line = re.sub(u'[\u2018\u2019]', '\'', line) 286 # replace non-ASCII chars to spaces 287 line = re.sub(u'[^\x00-\x7f]', ' ', line) 288 line = line.strip() 289 first_column = line.find(':') 290 return normalize_path(line[:first_column], flags, 291 android_root) + line[first_column:] 292 293 294def parse_input_file_chrome(infile, flags): 295 """Parse Chrome input file, collect parameters and warning lines.""" 296 platform_version = 'unknown' 297 board_name = 'unknown' 298 architecture = 'unknown' 299 300 # only handle warning lines of format 'file_path:line_no:col_no: warning: ...' 301 chrome_warning_pattern = r'^[^ ]*/[^ ]*:[0-9]+:[0-9]+: warning: .*' 302 303 warning_pattern = re.compile(chrome_warning_pattern) 304 305 # Collect all unique warning lines 306 # Remove the duplicated warnings save ~8% of time when parsing 307 # one typical build log than before 308 unique_warnings = dict() 309 for line in infile: 310 if warning_pattern.match(line): 311 normalized_line = normalize_warning_line(line, flags) 312 if normalized_line not in unique_warnings: 313 unique_warnings[normalized_line] = generate_cs_link(line, flags) 314 elif (platform_version == 'unknown' or board_name == 'unknown' or 315 architecture == 'unknown'): 316 m = re.match(r'.+Package:.+chromeos-base/chromeos-chrome-', line) 317 if m is not None: 318 platform_version = 'R' + line.split('chrome-')[1].split('_')[0] 319 continue 320 m = re.match(r'.+Source\sunpacked\sin\s(.+)', line) 321 if m is not None: 322 board_name = m.group(1).split('/')[2] 323 continue 324 m = re.match(r'.+USE:\s*([^\s]*).*', line) 325 if m is not None: 326 architecture = m.group(1) 327 continue 328 329 header_str = '%s - %s - %s' % (platform_version, board_name, architecture) 330 return unique_warnings, header_str 331 332 333def add_normalized_line_to_warnings(line, flags, android_root, unique_warnings): 334 """Parse/normalize path, updating warning line and add to warnings dict.""" 335 normalized_line = normalize_warning_line(line, flags, android_root) 336 if normalized_line not in unique_warnings: 337 unique_warnings[normalized_line] = generate_cs_link(line, flags, 338 android_root) 339 return unique_warnings 340 341 342def parse_input_file_android(infile, flags): 343 """Parse Android input file, collect parameters and warning lines.""" 344 platform_version = 'unknown' 345 target_product = 'unknown' 346 target_variant = 'unknown' 347 android_root = find_android_root(infile) 348 infile.seek(0) 349 350 # rustc warning messages have two lines that should be combined: 351 # warning: description 352 # --> file_path:line_number:column_number 353 # Some warning messages have no file name: 354 # warning: macro replacement list ... [bugprone-macro-parentheses] 355 # Some makefile warning messages have no line number: 356 # some/path/file.mk: warning: description 357 # C/C++ compiler warning messages have line and column numbers: 358 # some/path/file.c:line_number:column_number: warning: description 359 warning_pattern = re.compile('(^[^ ]*/[^ ]*: warning: .*)|(^warning: .*)') 360 warning_without_file = re.compile('^warning: .*') 361 rustc_file_position = re.compile('^[ ]+--> [^ ]*/[^ ]*:[0-9]+:[0-9]+') 362 363 # Collect all unique warning lines 364 # Remove the duplicated warnings save ~8% of time when parsing 365 # one typical build log than before 366 unique_warnings = dict() 367 line_counter = 0 368 prev_warning = '' 369 for line in infile: 370 if prev_warning: 371 if rustc_file_position.match(line): 372 # must be a rustc warning, combine 2 lines into one warning 373 line = line.strip().replace('--> ', '') + ': ' + prev_warning 374 unique_warnings = add_normalized_line_to_warnings( 375 line, flags, android_root, unique_warnings) 376 prev_warning = '' 377 continue 378 # add prev_warning, and then process the current line 379 prev_warning = 'unknown_source_file: ' + prev_warning 380 unique_warnings = add_normalized_line_to_warnings( 381 prev_warning, flags, android_root, unique_warnings) 382 prev_warning = '' 383 384 if warning_pattern.match(line): 385 if warning_without_file.match(line): 386 # save this line and combine it with the next line 387 prev_warning = line 388 else: 389 unique_warnings = add_normalized_line_to_warnings( 390 line, flags, android_root, unique_warnings) 391 continue 392 393 if line_counter < 100: 394 # save a little bit of time by only doing this for the first few lines 395 line_counter += 1 396 m = re.search('(?<=^PLATFORM_VERSION=).*', line) 397 if m is not None: 398 platform_version = m.group(0) 399 m = re.search('(?<=^TARGET_PRODUCT=).*', line) 400 if m is not None: 401 target_product = m.group(0) 402 m = re.search('(?<=^TARGET_BUILD_VARIANT=).*', line) 403 if m is not None: 404 target_variant = m.group(0) 405 m = re.search('(?<=^TOP=).*', line) 406 if m is not None: 407 android_root = m.group(1) 408 409 if android_root: 410 new_unique_warnings = dict() 411 for warning_line in unique_warnings: 412 normalized_line = normalize_warning_line(warning_line, flags, 413 android_root) 414 new_unique_warnings[normalized_line] = generate_android_cs_link( 415 warning_line, flags, android_root) 416 unique_warnings = new_unique_warnings 417 418 header_str = '%s - %s - %s' % (platform_version, target_product, 419 target_variant) 420 return unique_warnings, header_str 421 422 423def parse_input_file(infile, flags): 424 if flags.platform == 'chrome': 425 return parse_input_file_chrome(infile, flags) 426 if flags.platform == 'android': 427 return parse_input_file_android(infile, flags) 428 raise RuntimeError('parse_input_file not defined for platform %s' % 429 flags.platform) 430 431 432def parse_compiler_output(compiler_output): 433 """Parse compiler output for relevant info.""" 434 split_output = compiler_output.split(':', 3) # 3 = max splits 435 file_path = split_output[0] 436 line_number = int(split_output[1]) 437 col_number = int(split_output[2].split(' ')[0]) 438 warning_message = split_output[3] 439 return file_path, line_number, col_number, warning_message 440 441 442def get_warn_patterns(platform): 443 """Get and initialize warn_patterns.""" 444 warn_patterns = [] 445 if platform == 'chrome': 446 warn_patterns = cpp_patterns.warn_patterns 447 elif platform == 'android': 448 warn_patterns = make_patterns.warn_patterns + cpp_patterns.warn_patterns + java_patterns.warn_patterns + tidy_patterns.warn_patterns + other_patterns.warn_patterns 449 else: 450 raise Exception('platform name %s is not valid' % platform) 451 for w in warn_patterns: 452 w['members'] = [] 453 # Each warning pattern has a 'projects' dictionary, that 454 # maps a project name to number of warnings in that project. 455 w['projects'] = {} 456 return warn_patterns 457 458 459def get_project_list(platform): 460 """Return project list for appropriate platform.""" 461 if platform == 'chrome': 462 return chrome_project_list.project_list 463 if platform == 'android': 464 return android_project_list.project_list 465 raise Exception('platform name %s is not valid' % platform) 466 467 468def parallel_classify_warnings(warning_data, args, project_names, 469 project_patterns, warn_patterns, 470 use_google3, create_launch_subprocs_fn, 471 classify_warnings_fn): 472 """Classify all warning lines with num_cpu parallel processes.""" 473 num_cpu = args.processes 474 group_results = [] 475 476 if num_cpu > 1: 477 # set up parallel processing for this... 478 warning_groups = [[] for _ in range(num_cpu)] 479 i = 0 480 for warning, link in warning_data.items(): 481 warning_groups[i].append((warning, link)) 482 i = (i + 1) % num_cpu 483 arg_groups = [[] for _ in range(num_cpu)] 484 for i, group in enumerate(warning_groups): 485 arg_groups[i] = [{ 486 'group': group, 487 'project_patterns': project_patterns, 488 'warn_patterns': warn_patterns, 489 'num_processes': num_cpu 490 }] 491 492 group_results = create_launch_subprocs_fn(num_cpu, 493 classify_warnings_fn, 494 arg_groups, 495 group_results) 496 else: 497 group_results = [] 498 for warning, link in warning_data.items(): 499 classify_one_warning(warning, link, group_results, 500 project_patterns, warn_patterns) 501 group_results = [group_results] 502 503 warning_messages = [] 504 warning_links = [] 505 warning_records = [] 506 if use_google3: 507 group_results = [group_results] 508 for group_result in group_results: 509 for result in group_result: 510 for line, link, pattern_idx, project_idx in result: 511 pattern = warn_patterns[pattern_idx] 512 pattern['members'].append(line) 513 message_idx = len(warning_messages) 514 warning_messages.append(line) 515 link_idx = len(warning_links) 516 warning_links.append(link) 517 warning_records.append([pattern_idx, project_idx, message_idx, 518 link_idx]) 519 pname = '???' if project_idx < 0 else project_names[project_idx] 520 # Count warnings by project. 521 if pname in pattern['projects']: 522 pattern['projects'][pname] += 1 523 else: 524 pattern['projects'][pname] = 1 525 return warning_messages, warning_links, warning_records 526 527 528def process_log(logfile, flags, project_names, project_patterns, warn_patterns, 529 html_path, use_google3, create_launch_subprocs_fn, 530 classify_warnings_fn, logfile_object): 531 # pylint: disable=g-doc-args 532 # pylint: disable=g-doc-return-or-yield 533 """Function that handles processing of a log. 534 535 This is isolated into its own function (rather than just taking place in main) 536 so that it can be used by both warn.py and the borg job process_gs_logs.py, to 537 avoid duplication of code. 538 Note that if the arguments to this function change, process_gs_logs.py must 539 be updated accordingly. 540 """ 541 if logfile_object is None: 542 with io.open(logfile, encoding='utf-8') as log: 543 warning_lines_and_links, header_str = parse_input_file(log, flags) 544 else: 545 warning_lines_and_links, header_str = parse_input_file( 546 logfile_object, flags) 547 warning_messages, warning_links, warning_records = parallel_classify_warnings( 548 warning_lines_and_links, flags, project_names, project_patterns, 549 warn_patterns, use_google3, create_launch_subprocs_fn, 550 classify_warnings_fn) 551 552 html_writer.write_html(flags, project_names, warn_patterns, html_path, 553 warning_messages, warning_links, warning_records, 554 header_str) 555 556 return warning_messages, warning_links, warning_records, header_str 557 558 559def common_main(use_google3, create_launch_subprocs_fn, classify_warnings_fn, 560 logfile_object=None): 561 """Shared main function for Google3 and non-Google3 versions of warn.py.""" 562 flags = parse_args(use_google3) 563 warn_patterns = get_warn_patterns(flags.platform) 564 project_list = get_project_list(flags.platform) 565 566 project_names = get_project_names(project_list) 567 project_patterns = [re.compile(p[1]) for p in project_list] 568 569 # html_path=None because we output html below if not outputting CSV 570 warning_messages, warning_links, warning_records, header_str = process_log( 571 logfile=flags.log, flags=flags, project_names=project_names, 572 project_patterns=project_patterns, warn_patterns=warn_patterns, 573 html_path=None, use_google3=use_google3, 574 create_launch_subprocs_fn=create_launch_subprocs_fn, 575 classify_warnings_fn=classify_warnings_fn, 576 logfile_object=logfile_object) 577 578 html_writer.write_out_csv(flags, warn_patterns, warning_messages, 579 warning_links, warning_records, header_str, 580 project_names) 581 582 # Return these values, so that caller can use them, if desired. 583 return flags, warning_messages, warning_records, warn_patterns 584