1#!/usr/bin/env python2 2# 3# Copyright 2020 - The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Tool to generate data for smoke tests.""" 18 19from __future__ import print_function 20 21import collections 22import datetime 23import gzip 24import operator 25import os 26import re 27import tempfile 28 29import jinja2 30import util 31 32ANDROID_REPOSITORY_ROOT = util.android_repository_root() 33LIBCORE_DIR = os.path.join(ANDROID_REPOSITORY_ROOT, 'libcore') 34LOGS_DIR = os.path.join(LIBCORE_DIR, 'smoketest') 35REQUIRED_RUNS = 3 36CTS_LOG_LINE_PATTERN = (r'(\d+)-(\d+) +(\d+):(\d+):(\d+)\.(\d+) +\d+ +\d+ +' 37 r'[^ ]+ (CtsTestRunListener|TestRunner) *: *(.+)') 38THIS_YEAR = datetime.datetime.now().year 39START_MATCHER = ('CtsTestRunListener', r'Now executing\s*:\s*(.+)') 40TESTS_RAN_MATCHER = ('TestRunner', r'started\s*:.*') 41END_MATCHER = ('CtsTestRunListener', r'Total memory\s*:.+') 42HARD_END_MATCHER = ('TestRunner', r'run finished\s*:.+') 43JAVA_CLASS_PATTERN = r'(?:([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)\.)?[a-zA-Z_][a-zA-Z0-9_]*' 44MAX_RUN_TIME = datetime.timedelta(minutes=10) 45OUT_DIR = tempfile.mkdtemp() 46ROLL_UP_TEST_CLASSES_TO_PACKAGE = False 47CTS_MODULE_NAME = 'CtsLibcoreTestCases' 48 49TEST_MAPPING_TEMPLATE = jinja2.Template(""" 50{ 51 "presubmit": [ 52 { 53 "name": "{{module}}", 54 "options": [{% for test in exclusions %} 55 { 56 "exclude-filter": "{{test}}" 57 }{% if not loop.last %},{% endif %}{% endfor %} 58 ] 59 } 60 ] 61} 62""".strip()) 63 64 65def find_all_log_files(): 66 """Returns a list of the log files to read.""" 67 if not os.path.isdir(LOGS_DIR): 68 raise Exception('Could not find logs directory ' + LOGS_DIR) 69 filenames = os.listdir(LOGS_DIR) 70 if not filenames: 71 raise Exception('Found empty logs directory ' + LOGS_DIR) 72 if len(filenames) != REQUIRED_RUNS: 73 raise Exception('Expected to find exactly %d files in %s, found %d' % 74 (REQUIRED_RUNS, LOGS_DIR, len(filenames))) 75 return map(lambda f: os.path.join(LOGS_DIR, f), filenames) 76 77 78def read_cts_logs(filename): 79 """Read CTS entries from a log file. 80 81 Args: 82 filename: The name of the file to read. 83 84 Yields: 85 Tuples of timestamps (as datetimes), log sources, and log messages. 86 """ 87 print('Reading ' + util.printable_path(filename)) 88 with gzip.open(filename, mode='rt') as log_file: 89 for line in log_file: 90 cts_match = re.match(CTS_LOG_LINE_PATTERN, line) 91 if cts_match: 92 assert len(cts_match.groups()) == 8 93 timestamp = datetime.datetime( 94 year=THIS_YEAR, 95 month=int(cts_match.group(1)), 96 day=int(cts_match.group(2)), 97 hour=int(cts_match.group(3)), 98 minute=int(cts_match.group(4)), 99 second=int(cts_match.group(5)), 100 microsecond=1000 * int(cts_match.group(6))) 101 source = cts_match.group(7) 102 message = cts_match.group(8) 103 yield (timestamp, source, message) 104 105 106def match(matcher, got_source, message): 107 """Check whether a log line matches requirements. 108 109 Args: 110 matcher: A pair of the required log source and a pattern to match messages. 111 got_source: The actual log source. 112 message: The actual message. 113 114 Returns: 115 A MatchObject from the pattern match against the message, or None. 116 """ 117 (want_source, pattern) = matcher 118 if got_source == want_source: 119 return re.match(pattern, message) 120 else: 121 return None 122 123 124def parse_running_times(filename): 125 """Parse the running times from a log file. 126 127 Args: 128 filename: The name of the file to read. 129 130 Yields: 131 Pairs of test names and running times (as timedeltas). Also emits the 132 overall elapsed time from the start of the first test to the end of the last 133 test with the key 'OVERALL'. 134 135 Raises: 136 Exception: The log file entries didn't look like we expected. 137 """ 138 executing = None 139 start_timestamp = None 140 ran_tests = False 141 overall_start_timestamp = None 142 overall_end_timestamp = None 143 for (timestamp, source, message) in read_cts_logs(filename): 144 start_match = match(START_MATCHER, source, message) 145 tests_ran_match = match(TESTS_RAN_MATCHER, source, message) 146 end_match = match(END_MATCHER, source, message) 147 hard_end_match = match(HARD_END_MATCHER, source, message) 148 if not executing: 149 if start_match: 150 assert len(start_match.groups()) == 1 151 executing = start_match.group(1) 152 start_timestamp = timestamp 153 if not overall_start_timestamp: 154 overall_start_timestamp = timestamp 155 else: 156 if start_match: 157 raise Exception( 158 'Found start for %s while waiting for end for %s at %s in %s' % 159 (start_match.group(1), executing, str(timestamp), filename)) 160 if tests_ran_match: 161 ran_tests = True 162 if end_match or hard_end_match: 163 running_time = timestamp - start_timestamp 164 # We see two types of execution in the logs. One appears to be some kind 165 # of dry run which doesn't actually run any tests (and completes 166 # significantly faster). We want the one that actually runs tests. 167 if ran_tests: 168 yield (executing, running_time) 169 executing = None 170 start_timestamp = None 171 ran_tests = False 172 overall_end_timestamp = timestamp 173 if executing: 174 raise Exception('Reached EOF while waiting for end for %s in %s' % 175 (executing, filename)) 176 yield ('OVERALL', overall_end_timestamp - overall_start_timestamp) 177 178 179def collect_running_times(filenames): 180 """Collect running times from some log file. 181 182 Args: 183 filenames: The names of the files to read. 184 185 Returns: 186 A tuple containing (1) a dictionary mapping test names to sets of running 187 times (as timedeltas), and (2) a list of overall running times (i.e. elapsed 188 times from the start of the first test to the end of the last test). 189 """ 190 times_by_test = collections.defaultdict(list) 191 output_path = os.path.join(OUT_DIR, 'test_times.txt') 192 overall_times = [] 193 with open(output_path, 'w') as output: 194 for filename in filenames: 195 for (test_name, time) in parse_running_times(filename): 196 output.write('%s: %g ms\n' % (test_name, time.total_seconds() * 1000)) 197 if test_name == 'OVERALL': 198 overall_times.append(time) 199 else: 200 times_by_test[test_name].append(time) 201 print('Wrote test times to ' + util.printable_path(output_path)) 202 return (times_by_test, overall_times) 203 204 205def process_running_times(times_by_test): 206 """Processes the collected running times. 207 208 Args: 209 times_by_test: A dictionary mapping test names to sets of running times. 210 211 Returns: 212 A dictionary mapping test names to fastest running times. 213 """ 214 for (test, times) in times_by_test.iteritems(): 215 if len(times) != REQUIRED_RUNS: 216 print('Warning: Only found %d runs for %s' % (len(times), test)) 217 return {test: min(times) for (test, times) in times_by_test.iteritems()} 218 219 220def calculate_overhead_ratio(overall_times, fastest_time_by_test): 221 """Calculates a ratio for the actual overall time to the sum of test times. 222 223 The actual overall times are the elapsed times from the start of the first 224 test to the end of the last test. The average of these is used. The ratio is 225 taken with the sume of the fastest time for each test (which is what will be 226 used for scoring). 227 228 Args: 229 overall_times: A list of overall running times. 230 fastest_time_by_test: A dictionary mapping test names to fastest running 231 times. 232 233 Returns: 234 The ratio. 235 """ 236 average_overall_time = sum(overall_times, 237 datetime.timedelta(0)) / len(overall_times) 238 total_time_by_test = sum(fastest_time_by_test.values(), datetime.timedelta(0)) 239 ratio = ( 240 average_overall_time.total_seconds() / total_time_by_test.total_seconds()) 241 print( 242 'Average time for run is %g seconds, sum of fastest test times is %g, ratio is %g' 243 % (average_overall_time.total_seconds(), 244 total_time_by_test.total_seconds(), ratio)) 245................................................................................ 246 # N.B. Possible future enhancement: Currently, because# we take the fastest of 247 # three runs, a real run will always be slightly slower than we predict. We 248 # could multiply in another overhead factor to account for this, e.g. by 249 # looking at the ratio of the mean of the three to the fastest of the three. 250 # (This factor should be applied globally rather than individually to each 251 # test so as not to penalize tests which happened to have a slow run or two.) 252 # This is not a high priority since for now we can just set MAX_RUN_TIME a bit 253 # low to allow for this. 254 return ratio 255 256 257def get_parent(name): 258 """Returns the parent of a Java class or package name.""" 259 class_match = re.match(JAVA_CLASS_PATTERN, name) 260 if not class_match: 261 raise Exception('Could not parse Java class name ' + name) 262 assert len(class_match.groups()) == 1 263 return class_match.group(1) 264 265 266def group_times_by_package(times_by_test): 267 """Groups the test classes by package name, summing the times. 268 269 Args: 270 times_by_test: A dictionary mapping test names to fastest running times. 271 272 Returns: 273 A dictionary mapping package names to total fastest running times. 274 """ 275 time_by_package = collections.defaultdict(datetime.timedelta) 276 for (clazz, time) in times_by_test.iteritems(): 277 package = get_parent(clazz) 278 if package: # A few tests have no package. They're weird, let's skip them. 279 time_by_package[package] = time_by_package[package] + time 280 output_path = os.path.join(OUT_DIR, 'package_times.txt') 281 with open(output_path, 'w') as output: 282 for (package, time) in time_by_package.iteritems(): 283 output.write('%s: %s ms\n' % (package, time.total_seconds() * 1000.0)) 284 print('Wrote package times to ' + util.printable_path(output_path)) 285 return time_by_package 286 287 288def find_tests_to_run(time_by_test, overhead_ratio): 289 """Finds the tests to actually run. 290 291 The tests chosen will be the fastest set such that their total time, when 292 multiplied by the overhead ratio, is less than the maximum. 293 294 Args: 295 time_by_test: A dictionary mapping test names to total fastest running 296 times. The names can be packages, classes, or a mixture of the two. 297 overhead_ratio: A ratio for the actual overall time to the sum of test 298 times. 299 300 Returns: 301 A list of test names whose running times are below the threshold. 302 303 Raises: 304 Exception: The total running time of all the tests is below the threhold. 305 """ 306 test_inclusions = {test: False for test in time_by_test.keys()} 307 included = 0 308 total_time = datetime.timedelta() 309 output_path = os.path.join(OUT_DIR, 'included_tests.txt') 310 adjusted_max_run_time_seconds = MAX_RUN_TIME.total_seconds() / overhead_ratio 311 with open(output_path, 'w') as output: 312 for (test, time) in sorted( 313 time_by_test.iteritems(), key=operator.itemgetter(1)): 314 if (total_time + time).total_seconds() <= adjusted_max_run_time_seconds: 315 test_inclusions[test] = True 316 included += 1 317 total_time += time 318 output.write('%s: %g ms -> %g ms\n' % 319 (test, time.total_seconds() * 1000.0, 320 total_time.total_seconds() * 1000.0)) 321 else: 322 print('Can run fastest %d of %d tests in %g * %g = %g seconds' % 323 (included, len(time_by_test), total_time.total_seconds(), 324 overhead_ratio, total_time.total_seconds() * overhead_ratio)) 325 print('Wrote tests to include to ' + util.printable_path(output_path)) 326 return test_inclusions 327 raise Exception('Apparently we can run all the tests? This smells wrong.') 328 329 330def build_test_tree(tests): 331 """Builds a tree of tests. 332 333 Args: 334 tests: The list of tests to build into a tree. These can be packages, 335 classes, or a mixture of the two. 336 337 Returns: 338 A dictionary mapping every test's ancestors to its children. The roots 339 appear as children of None. 340 """ 341 tree = collections.defaultdict(set) 342 for test in tests: 343 while True: 344 parent = get_parent(test) 345 tree[parent].add(test) 346 if parent: 347 test = parent 348 else: 349 break 350 return tree 351 352 353def build_exclusion_list(test_inclusions): 354 """Builds a list of tests to exclude. 355 356 Args: 357 test_inclusions: A dictionary mapping test names to whether or not they 358 should be included in the smoke tests. The names can be packages, classes, 359 or a mixture of the two. 360 361 Returns: 362 A list of the exclusions. These could be individual tests, or packages or 363 some part of the package hierarchy if they are to be entirely excluded. 364 """ 365 tree = build_test_tree(test_inclusions.keys()) 366 exclusions = [] 367 368 # We do a DFS of the tree, rolling up exclusions as far as possible, i.e. if 369 # an entire branch is excluded, we exclude that branch rather than all of its 370 # leaves. 371 372 def visit(test): 373 """Visitor for a DFS of the tree. 374 375 Args: 376 test: The test to visit (either a class or package name). 377 378 Returns: 379 Whether or not the parent should include this node in the tree. 380 381 Raises: 382 Exception: The tree had an unexpected structure. 383 """ 384 if test in test_inclusions: 385 # We only expect to have inclusion status for the leaves of the tree. 386 # Check that this is the case. 387 if test in tree: 388 raise Exception('The name %s is used for a leaf and a branch!' % test) 389 # Return to the parent node whether this leaf is included. 390 return test_inclusions[test] 391 else: 392 # We expect to have inclusion status for all leaves of the tree. Check 393 # that this is the case. 394 if test not in tree: 395 raise Exception('Leaf %s has no inclusion status!' % test) 396 # Look at all the children of this node. 397 any_child_included = False 398 child_exclusions = [] 399 for child in tree[test]: 400 child_included = visit(child) 401 if child_included: 402 any_child_included = True 403 else: 404 child_exclusions.append(child) 405 # If any children are included, we will count this node as included, so we 406 # have to add any excluded children to the exclusion list. 407 if any_child_included: 408 for child in child_exclusions: 409 exclusions.append(child) 410 # Now return whether this node should be counted as included, which means 411 # whether any children are included. 412 return any_child_included 413 414 # Start the DFS at each root of the tree. 415 for root in tree[None]: 416 root_included = visit(root) 417 # If any of the roots are counted as excluded, add them to the list. 418 if not root_included: 419 exclusions.append(root) 420 421 # Finally, sort and return the list of exclusions. 422 return sorted(exclusions) 423 424 425def self_test(): 426 """Self-test for the build_include_exclude_list logic.""" 427 test_inclusions = { 428 'a.x': True, 429 'a.y': True, 430 'b.x': True, 431 'b.y': False, 432 'c.x': False, 433 'c.y': False, 434 'd.x.m': True, 435 'd.x.n': True, 436 'd.y.m': True, 437 'd.y.n': False, 438 'd.z.m': False, 439 'd.z.n': False, 440 } 441 expected_exclusions = [ 442 'b.y', 443 'c', 444 'd.y.n', 445 'd.z', 446 ] 447 actual_exclusions = build_exclusion_list(test_inclusions) 448 if actual_exclusions != expected_exclusions: 449 raise Exception('Self test failed! Expected %s but got %s' % 450 (expected_exclusions, actual_exclusions)) 451 452 453def main(): 454 """The main method.""" 455 self_test() 456 filenames = find_all_log_files() 457 (times_by_test, overall_times) = collect_running_times(filenames) 458 fastest_time_by_test = process_running_times(times_by_test) 459 overhead_ratio = calculate_overhead_ratio(overall_times, fastest_time_by_test) 460 if ROLL_UP_TEST_CLASSES_TO_PACKAGE: 461 time_by_package = group_times_by_package(fastest_time_by_test) 462 test_inclusions = find_tests_to_run(time_by_package, overhead_ratio) 463 else: 464 test_inclusions = find_tests_to_run(fastest_time_by_test, overhead_ratio) 465 exclusions = build_exclusion_list(test_inclusions) 466 output_path = os.path.join(LIBCORE_DIR, 'TEST_MAPPING') 467 with open(output_path, 'w') as output: 468 output.write( 469 TEST_MAPPING_TEMPLATE.render( 470 module=CTS_MODULE_NAME, exclusions=exclusions)) 471 print('Wrote test mapping to ' + util.printable_path(output_path)) 472 473 474main() 475