tools/testmapping/gen_smoke_tests.py

#!/usr/bin/env python2
#
#   Copyright 2020 - The Android Open Source Project
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

"""Tool to generate data for smoke tests."""

from __future__ import print_function

import collections
import datetime
import gzip
import operator
import os
import re
import tempfile

import jinja2
import util

ANDROID_REPOSITORY_ROOT = util.android_repository_root()
LIBCORE_DIR = os.path.join(ANDROID_REPOSITORY_ROOT, 'libcore')
LOGS_DIR = os.path.join(LIBCORE_DIR, 'smoketest')
REQUIRED_RUNS = 3
CTS_LOG_LINE_PATTERN = (r'(\d+)-(\d+) +(\d+):(\d+):(\d+)\.(\d+) +\d+ +\d+ +'
                        r'[^ ]+ (CtsTestRunListener|TestRunner) *: *(.+)')
THIS_YEAR = datetime.datetime.now().year
START_MATCHER = ('CtsTestRunListener', r'Now executing\s*:\s*(.+)')
TESTS_RAN_MATCHER = ('TestRunner', r'started\s*:.*')
END_MATCHER = ('CtsTestRunListener', r'Total memory\s*:.+')
HARD_END_MATCHER = ('TestRunner', r'run finished\s*:.+')
JAVA_CLASS_PATTERN = r'(?:([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)\.)?[a-zA-Z_][a-zA-Z0-9_]*'
MAX_RUN_TIME = datetime.timedelta(minutes=10)
OUT_DIR = tempfile.mkdtemp()
ROLL_UP_TEST_CLASSES_TO_PACKAGE = False
CTS_MODULE_NAME = 'CtsLibcoreTestCases'

TEST_MAPPING_TEMPLATE = jinja2.Template("""
{
  "presubmit": [
    {
      "name": "{{module}}",
      "options": [{% for test in exclusions %}
        {
          "exclude-filter": "{{test}}"
        }{% if not loop.last %},{% endif %}{% endfor %}
      ]
    }
  ]
}
""".strip())


def find_all_log_files():
  """Returns a list of the log files to read."""
  if not os.path.isdir(LOGS_DIR):
    raise Exception('Could not find logs directory ' + LOGS_DIR)
  filenames = os.listdir(LOGS_DIR)
  if not filenames:
    raise Exception('Found empty logs directory ' + LOGS_DIR)
  if len(filenames) != REQUIRED_RUNS:
    raise Exception('Expected to find exactly %d files in %s, found %d' %
                    (REQUIRED_RUNS, LOGS_DIR, len(filenames)))
  return map(lambda f: os.path.join(LOGS_DIR, f), filenames)


def read_cts_logs(filename):
  """Read CTS entries from a log file.

  Args:
    filename: The name of the file to read.

  Yields:
    Tuples of timestamps (as datetimes), log sources, and log messages.
  """
  print('Reading ' + util.printable_path(filename))
  with gzip.open(filename, mode='rt') as log_file:
    for line in log_file:
      cts_match = re.match(CTS_LOG_LINE_PATTERN, line)
      if cts_match:
        assert len(cts_match.groups()) == 8
        timestamp = datetime.datetime(
            year=THIS_YEAR,
            month=int(cts_match.group(1)),
            day=int(cts_match.group(2)),
            hour=int(cts_match.group(3)),
            minute=int(cts_match.group(4)),
            second=int(cts_match.group(5)),
            microsecond=1000 * int(cts_match.group(6)))
        source = cts_match.group(7)
        message = cts_match.group(8)
        yield (timestamp, source, message)


def match(matcher, got_source, message):
  """Check whether a log line matches requirements.

  Args:
    matcher: A pair of the required log source and a pattern to match messages.
    got_source: The actual log source.
    message: The actual message.

  Returns:
    A MatchObject from the pattern match against the message, or None.
  """
  (want_source, pattern) = matcher
  if got_source == want_source:
    return re.match(pattern, message)
  else:
    return None


def parse_running_times(filename):
  """Parse the running times from a log file.

  Args:
    filename: The name of the file to read.

  Yields:
    Pairs of test names and running times (as timedeltas). Also emits the
    overall elapsed time from the start of the first test to the end of the last
    test with the key 'OVERALL'.

  Raises:
    Exception: The log file entries didn't look like we expected.
  """
  executing = None
  start_timestamp = None
  ran_tests = False
  overall_start_timestamp = None
  overall_end_timestamp = None
  for (timestamp, source, message) in read_cts_logs(filename):
    start_match = match(START_MATCHER, source, message)
    tests_ran_match = match(TESTS_RAN_MATCHER, source, message)
    end_match = match(END_MATCHER, source, message)
    hard_end_match = match(HARD_END_MATCHER, source, message)
    if not executing:
      if start_match:
        assert len(start_match.groups()) == 1
        executing = start_match.group(1)
        start_timestamp = timestamp
        if not overall_start_timestamp:
          overall_start_timestamp = timestamp
    else:
      if start_match:
        raise Exception(
            'Found start for %s while waiting for end for %s at %s in %s' %
            (start_match.group(1), executing, str(timestamp), filename))
      if tests_ran_match:
        ran_tests = True
      if end_match or hard_end_match:
        running_time = timestamp - start_timestamp
        # We see two types of execution in the logs. One appears to be some kind
        # of dry run which doesn't actually run any tests (and completes
        # significantly faster). We want the one that actually runs tests.
        if ran_tests:
          yield (executing, running_time)
        executing = None
        start_timestamp = None
        ran_tests = False
        overall_end_timestamp = timestamp
  if executing:
    raise Exception('Reached EOF while waiting for end for %s in %s' %
                    (executing, filename))
  yield ('OVERALL', overall_end_timestamp - overall_start_timestamp)


def collect_running_times(filenames):
  """Collect running times from some log file.

  Args:
    filenames: The names of the files to read.

  Returns:
    A tuple containing (1) a dictionary mapping test names to sets of running
    times (as timedeltas), and (2) a list of overall running times (i.e. elapsed
    times from the start of the first test to the end of the last test).
  """
  times_by_test = collections.defaultdict(list)
  output_path = os.path.join(OUT_DIR, 'test_times.txt')
  overall_times = []
  with open(output_path, 'w') as output:
    for filename in filenames:
      for (test_name, time) in parse_running_times(filename):
        output.write('%s: %g ms\n' % (test_name, time.total_seconds() * 1000))
        if test_name == 'OVERALL':
          overall_times.append(time)
        else:
          times_by_test[test_name].append(time)
  print('Wrote test times to ' + util.printable_path(output_path))
  return (times_by_test, overall_times)


def process_running_times(times_by_test):
  """Processes the collected running times.

  Args:
    times_by_test: A dictionary mapping test names to sets of running times.

  Returns:
    A dictionary mapping test names to fastest running times.
  """
  for (test, times) in times_by_test.iteritems():
    if len(times) != REQUIRED_RUNS:
      print('Warning: Only found %d runs for %s' % (len(times), test))
  return {test: min(times) for (test, times) in times_by_test.iteritems()}


def calculate_overhead_ratio(overall_times, fastest_time_by_test):
  """Calculates a ratio for the actual overall time to the sum of test times.

   The actual overall times are the elapsed times from the start of the first
   test to the end of the last test. The average of these is used. The ratio is
   taken with the sume of the fastest time for each test (which is what will be
   used for scoring).

  Args:
    overall_times: A list of overall running times.
    fastest_time_by_test: A dictionary mapping test names to fastest running
      times.

  Returns:
    The ratio.
  """
  average_overall_time = sum(overall_times,
                             datetime.timedelta(0)) / len(overall_times)
  total_time_by_test = sum(fastest_time_by_test.values(), datetime.timedelta(0))
  ratio = (
      average_overall_time.total_seconds() / total_time_by_test.total_seconds())
  print(
      'Average time for run is %g seconds, sum of fastest test times is %g, ratio is %g'
      % (average_overall_time.total_seconds(),
         total_time_by_test.total_seconds(), ratio))
................................................................................
  # N.B. Possible future enhancement: Currently, because# we take the fastest of
  # three runs, a real run will always be slightly slower than we predict. We
  # could multiply in another overhead factor to account for this, e.g. by
  # looking at the ratio of the mean of the three to the fastest of the three.
  # (This factor should be applied globally rather than individually to each
  # test so as not to penalize tests which happened to have a slow run or two.)
  # This is not a high priority since for now we can just set MAX_RUN_TIME a bit
  # low to allow for this.
  return ratio


def get_parent(name):
  """Returns the parent of a Java class or package name."""
  class_match = re.match(JAVA_CLASS_PATTERN, name)
  if not class_match:
    raise Exception('Could not parse Java class name ' + name)
  assert len(class_match.groups()) == 1
  return class_match.group(1)


def group_times_by_package(times_by_test):
  """Groups the test classes by package name, summing the times.

  Args:
    times_by_test: A dictionary mapping test names to fastest running times.

  Returns:
    A dictionary mapping package names to total fastest running times.
  """
  time_by_package = collections.defaultdict(datetime.timedelta)
  for (clazz, time) in times_by_test.iteritems():
    package = get_parent(clazz)
    if package:  # A few tests have no package. They're weird, let's skip them.
      time_by_package[package] = time_by_package[package] + time
  output_path = os.path.join(OUT_DIR, 'package_times.txt')
  with open(output_path, 'w') as output:
    for (package, time) in time_by_package.iteritems():
      output.write('%s: %s ms\n' % (package, time.total_seconds() * 1000.0))
  print('Wrote package times to ' + util.printable_path(output_path))
  return time_by_package


def find_tests_to_run(time_by_test, overhead_ratio):
  """Finds the tests to actually run.

  The tests chosen will be the fastest set such that their total time, when
  multiplied by the overhead ratio, is less than the maximum.

  Args:
    time_by_test: A dictionary mapping test names to total fastest running
      times. The names can be packages, classes, or a mixture of the two.
    overhead_ratio: A ratio for the actual overall time to the sum of test
      times.

  Returns:
    A list of test names whose running times are below the threshold.

  Raises:
    Exception: The total running time of all the tests is below the threhold.
  """
  test_inclusions = {test: False for test in time_by_test.keys()}
  included = 0
  total_time = datetime.timedelta()
  output_path = os.path.join(OUT_DIR, 'included_tests.txt')
  adjusted_max_run_time_seconds = MAX_RUN_TIME.total_seconds() / overhead_ratio
  with open(output_path, 'w') as output:
    for (test, time) in sorted(
        time_by_test.iteritems(), key=operator.itemgetter(1)):
      if (total_time + time).total_seconds() <= adjusted_max_run_time_seconds:
        test_inclusions[test] = True
        included += 1
        total_time += time
        output.write('%s: %g ms -> %g ms\n' %
                     (test, time.total_seconds() * 1000.0,
                      total_time.total_seconds() * 1000.0))
      else:
        print('Can run fastest %d of %d tests in %g * %g = %g seconds' %
              (included, len(time_by_test), total_time.total_seconds(),
               overhead_ratio, total_time.total_seconds() * overhead_ratio))
        print('Wrote tests to include to ' + util.printable_path(output_path))
        return test_inclusions
  raise Exception('Apparently we can run all the tests? This smells wrong.')


def build_test_tree(tests):
  """Builds a tree of tests.

  Args:
    tests: The list of tests to build into a tree. These can be packages,
      classes, or a mixture of the two.

  Returns:
    A dictionary mapping every test's ancestors to its children. The roots
    appear as children of None.
  """
  tree = collections.defaultdict(set)
  for test in tests:
    while True:
      parent = get_parent(test)
      tree[parent].add(test)
      if parent:
        test = parent
      else:
        break
  return tree


def build_exclusion_list(test_inclusions):
  """Builds a list of tests to exclude.

  Args:
    test_inclusions: A dictionary mapping test names to whether or not they
      should be included in the smoke tests. The names can be packages, classes,
      or a mixture of the two.

  Returns:
    A list of the exclusions. These could be individual tests, or packages or
    some part of the package hierarchy if they are to be entirely excluded.
  """
  tree = build_test_tree(test_inclusions.keys())
  exclusions = []

  # We do a DFS of the tree, rolling up exclusions as far as possible, i.e. if
  # an entire branch is excluded, we exclude that branch rather than all of its
  # leaves.

  def visit(test):
    """Visitor for a DFS of the tree.

    Args:
      test: The test to visit (either a class or package name).

    Returns:
      Whether or not the parent should include this node in the tree.

    Raises:
      Exception: The tree had an unexpected structure.
    """
    if test in test_inclusions:
      # We only expect to have inclusion status for the leaves of the tree.
      # Check that this is the case.
      if test in tree:
        raise Exception('The name %s is used for a leaf and a branch!' % test)
      # Return to the parent node whether this leaf is included.
      return test_inclusions[test]
    else:
      # We expect to have inclusion status for all leaves of the tree. Check
      # that this is the case.
      if test not in tree:
        raise Exception('Leaf %s has no inclusion status!' % test)
      # Look at all the children of this node.
      any_child_included = False
      child_exclusions = []
      for child in tree[test]:
        child_included = visit(child)
        if child_included:
          any_child_included = True
        else:
          child_exclusions.append(child)
      # If any children are included, we will count this node as included, so we
      # have to add any excluded children to the exclusion list.
      if any_child_included:
        for child in child_exclusions:
          exclusions.append(child)
      # Now return whether this node should be counted as included, which means
      # whether any children are included.
      return any_child_included

  # Start the DFS at each root of the tree.
  for root in tree[None]:
    root_included = visit(root)
    # If any of the roots are counted as excluded, add them to the list.
    if not root_included:
      exclusions.append(root)

  # Finally, sort and return the list of exclusions.
  return sorted(exclusions)


def self_test():
  """Self-test for the build_include_exclude_list logic."""
  test_inclusions = {
      'a.x': True,
      'a.y': True,
      'b.x': True,
      'b.y': False,
      'c.x': False,
      'c.y': False,
      'd.x.m': True,
      'd.x.n': True,
      'd.y.m': True,
      'd.y.n': False,
      'd.z.m': False,
      'd.z.n': False,
  }
  expected_exclusions = [
      'b.y',
      'c',
      'd.y.n',
      'd.z',
  ]
  actual_exclusions = build_exclusion_list(test_inclusions)
  if actual_exclusions != expected_exclusions:
    raise Exception('Self test failed! Expected %s but got %s' %
                    (expected_exclusions, actual_exclusions))


def main():
  """The main method."""
  self_test()
  filenames = find_all_log_files()
  (times_by_test, overall_times) = collect_running_times(filenames)
  fastest_time_by_test = process_running_times(times_by_test)
  overhead_ratio = calculate_overhead_ratio(overall_times, fastest_time_by_test)
  if ROLL_UP_TEST_CLASSES_TO_PACKAGE:
    time_by_package = group_times_by_package(fastest_time_by_test)
    test_inclusions = find_tests_to_run(time_by_package, overhead_ratio)
  else:
    test_inclusions = find_tests_to_run(fastest_time_by_test, overhead_ratio)
  exclusions = build_exclusion_list(test_inclusions)
  output_path = os.path.join(LIBCORE_DIR, 'TEST_MAPPING')
  with open(output_path, 'w') as output:
    output.write(
        TEST_MAPPING_TEMPLATE.render(
            module=CTS_MODULE_NAME, exclusions=exclusions))
  print('Wrote test mapping to ' + util.printable_path(output_path))


main()