1#!/usr/bin/env python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Outputs quantitative information about Address Sanitizer traces."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23from collections import Counter
24from datetime import datetime
25import argparse
26import bisect
27import os
28import re
29
30
31def find_match(list_substrings, big_string):
32    """Returns the category a trace belongs to by searching substrings."""
33    for ind, substr in enumerate(list_substrings):
34        if big_string.find(substr) != -1:
35            return ind
36    return list_substrings.index("Uncategorized")
37
38
39def absolute_to_relative(data_lists, symbol_traces):
40    """Address changed to Dex File offset and shifting time to 0 min in ms."""
41
42    offsets = data_lists["offsets"]
43    time_offsets = data_lists["times"]
44
45    # Format of time provided by logcat
46    time_format_str = "%H:%M:%S.%f"
47    first_access_time = datetime.strptime(data_lists["plot_list"][0][0],
48                                          time_format_str)
49    for ind, elem in enumerate(data_lists["plot_list"]):
50        elem_date_time = datetime.strptime(elem[0], time_format_str)
51        # Shift time values so that first access is at time 0 milliseconds
52        elem[0] = int((elem_date_time - first_access_time).total_seconds() *
53                      1000)
54        address_access = int(elem[1], 16)
55        # For each poisoned address, find highest Dex File starting address less
56        # than address_access
57        dex_start_list, dex_size_list = zip(*data_lists["dex_ends_list"])
58        dex_file_ind = bisect.bisect(dex_start_list, address_access) - 1
59        dex_offset = address_access - dex_start_list[dex_file_ind]
60        # Assumes that offsets is already sorted and constrains offset to be
61        # within range of the dex_file
62        max_offset = min(offsets[1], dex_size_list[dex_file_ind])
63        # Meant to nullify data that does not meet offset criteria if specified
64        if (dex_offset >= offsets[0] and dex_offset < max_offset and
65                elem[0] >= time_offsets[0] and elem[0] < time_offsets[1]):
66
67            elem.insert(1, dex_offset)
68            # Category that a data point belongs to
69            elem.insert(2, data_lists["cat_list"][ind])
70        else:
71            elem[:] = 4 * [None]
72            symbol_traces[ind] = None
73            data_lists["cat_list"][ind] = None
74
75
76def print_category_info(cat_split, outname, out_dir_name, title):
77    """Prints information of category and puts related traces in a files."""
78    trace_counts_dict = Counter(cat_split)
79    trace_counts_list_ordered = trace_counts_dict.most_common()
80    print(53 * "-")
81    print(title)
82    print("\tNumber of distinct traces: " +
83          str(len(trace_counts_list_ordered)))
84    print("\tSum of trace counts: " +
85          str(sum([trace[1] for trace in trace_counts_list_ordered])))
86    print("\n\tCount: How many traces appeared with count\n\t", end="")
87    print(Counter([trace[1] for trace in trace_counts_list_ordered]))
88    with open(os.path.join(out_dir_name, outname), "w") as output_file:
89        for trace in trace_counts_list_ordered:
90            output_file.write("\n\nNumber of times appeared: " +
91                              str(trace[1]) +
92                              "\n")
93            output_file.write(trace[0].strip())
94
95
96def print_categories(categories, symbol_file_split, out_dir_name):
97    """Prints details of all categories."""
98    symbol_file_split = [trace for trace in symbol_file_split
99                         if trace is not None]
100    # Info of traces containing a call to current category
101    for cat_num, cat_name in enumerate(categories[1:]):
102        print("\nCategory #%d" % (cat_num + 1))
103        cat_split = [trace for trace in symbol_file_split
104                     if cat_name in trace]
105        cat_file_name = cat_name.lower() + "cat_output"
106        print_category_info(cat_split, cat_file_name, out_dir_name,
107                            "Traces containing: " + cat_name)
108        noncat_split = [trace for trace in symbol_file_split
109                        if cat_name not in trace]
110        print_category_info(noncat_split, "non" + cat_file_name,
111                            out_dir_name,
112                            "Traces not containing: " +
113                            cat_name)
114
115    # All traces (including uncategorized) together
116    print_category_info(symbol_file_split, "allcat_output",
117                        out_dir_name,
118                        "All traces together:")
119    # Traces containing none of keywords
120    # Only used if categories are passed in
121    if len(categories) > 1:
122        noncat_split = [trace for trace in symbol_file_split if
123                        all(cat_name not in trace
124                            for cat_name in categories)]
125        print_category_info(noncat_split, "noncat_output",
126                            out_dir_name,
127                            "Uncategorized calls")
128
129
130def is_directory(path_name):
131    """Checks if a path is an actual directory."""
132    if not os.path.isdir(path_name):
133        dir_error = "%s is not a directory" % (path_name)
134        raise argparse.ArgumentTypeError(dir_error)
135    return path_name
136
137
138def parse_args(argv):
139    """Parses arguments passed in."""
140    parser = argparse.ArgumentParser()
141    parser.add_argument("-d", action="store",
142                        default="", dest="out_dir_name", type=is_directory,
143                        help="Output Directory")
144    parser.add_argument("--dex-file", action="store",
145                        default=None, dest="dex_file",
146                        type=argparse.FileType("r"),
147                        help="Baksmali Dex File Dump")
148    parser.add_argument("--offsets", action="store", nargs=2,
149                        default=[float(0), float("inf")],
150                        dest="offsets",
151                        metavar="OFFSET",
152                        type=float,
153                        help="Filters out accesses not between provided"
154                             " offsets if provided. Can provide 'inf'"
155                             " for infinity")
156    parser.add_argument("--times", action="store", nargs=2,
157                        default=[float(0), float("inf")],
158                        dest="times",
159                        metavar="TIME",
160                        type=float,
161                        help="Filters out accesses not between provided"
162                             " time offsets if provided. Can provide 'inf'"
163                             " for infinity")
164    parser.add_argument("sanitizer_trace", action="store",
165                        type=argparse.FileType("r"),
166                        help="File containing sanitizer traces filtered by "
167                             "prune_sanitizer_output.py")
168    parser.add_argument("symbol_trace", action="store",
169                        type=argparse.FileType("r"),
170                        help="File containing symbolized traces that match "
171                             "sanitizer_trace")
172    parser.add_argument("dex_starts", action="store",
173                        type=argparse.FileType("r"),
174                        help="File containing starting addresses of Dex Files")
175    parser.add_argument("categories", action="store", nargs="*",
176                        help="Keywords expected to show in large amounts of"
177                             " symbolized traces")
178
179    return parser.parse_args(argv)
180
181
182def get_dex_offset_data(line, dex_file_item):
183    """ Returns a tuple of dex file offset, item name, and data of a line."""
184    return (int(line[:line.find(":")], 16),
185            (dex_file_item,
186             line.split("|")[1].strip())
187            )
188
189
190def read_data(parsed_argv):
191    """Reads data from filepath arguments and parses them into lists."""
192    # Using a dictionary to establish relation between lists added
193    data_lists = {}
194    categories = parsed_argv.categories
195    # Makes sure each trace maps to some category
196    categories.insert(0, "Uncategorized")
197
198    data_lists["offsets"] = parsed_argv.offsets
199    data_lists["offsets"].sort()
200
201    data_lists["times"] = parsed_argv.times
202    data_lists["times"].sort()
203
204    logcat_file_data = parsed_argv.sanitizer_trace.readlines()
205    parsed_argv.sanitizer_trace.close()
206
207    symbol_file_split = parsed_argv.symbol_trace.read().split("Stack Trace")
208    # Removes text before first trace
209    symbol_file_split = symbol_file_split[1:]
210    parsed_argv.symbol_trace.close()
211
212    dex_start_file_data = parsed_argv.dex_starts.readlines()
213    parsed_argv.dex_starts.close()
214
215    if parsed_argv.dex_file is not None:
216        dex_file_data = parsed_argv.dex_file.read()
217        parsed_argv.dex_file.close()
218        # Splits baksmali dump by each item
219        item_split = [s.splitlines() for s in re.split(r"\|\[[0-9]+\] ",
220                                                       dex_file_data)]
221        # Splits each item by line and creates a list of offsets and a
222        # corresponding list of the data associated with that line
223        offset_list, offset_data = zip(*[get_dex_offset_data(line, item[0])
224                                         for item in item_split
225                                         for line in item[1:]
226                                         if re.search("[0-9a-f]{6}:", line)
227                                         is not None and
228                                         line.find("|") != -1])
229        data_lists["offset_list"] = offset_list
230        data_lists["offset_data"] = offset_data
231    else:
232        dex_file_data = None
233
234    # Each element is a tuple of time and address accessed
235    data_lists["plot_list"] = [[elem[1] for elem in enumerate(line.split())
236                                if elem[0] in (1, 11)
237                                ]
238                               for line in logcat_file_data
239                               if "use-after-poison" in line or
240                               "unknown-crash" in line
241                               ]
242    # Contains a mapping between traces and the category they belong to
243    # based on arguments
244    data_lists["cat_list"] = [categories[find_match(categories, trace)]
245                              for trace in symbol_file_split]
246
247    # Contains a list of starting address of all dex files to calculate dex
248    # offsets
249    data_lists["dex_ends_list"] = [(int(line.split()[9], 16),
250                                    int(line.split()[12])
251                                    )
252                                   for line in dex_start_file_data
253                                   if "RegisterDexFile" in line
254                                   ]
255    # Dex File Starting addresses must be sorted because bisect requires sorted
256    # lists.
257    data_lists["dex_ends_list"].sort()
258
259    return data_lists, categories, symbol_file_split
260
261
262def main():
263    """Takes in trace information and outputs details about them."""
264    parsed_argv = parse_args(None)
265    data_lists, categories, symbol_file_split = read_data(parsed_argv)
266
267    # Formats plot_list such that each element is a data point
268    absolute_to_relative(data_lists, symbol_file_split)
269    for file_ext, cat_name in enumerate(categories):
270        out_file_name = os.path.join(parsed_argv.out_dir_name, "time_output_" +
271                                     str(file_ext) +
272                                     ".dat")
273        with open(out_file_name, "w") as output_file:
274            output_file.write("# Category: " + cat_name + "\n")
275            output_file.write("# Time, Dex File Offset_10, Dex File Offset_16,"
276                              " Address, Item Accessed, Item Member Accessed"
277                              " Unaligned\n")
278            for time, dex_offset, category, address in data_lists["plot_list"]:
279                if category == cat_name:
280                    output_file.write(
281                        str(time) +
282                        " " +
283                        str(dex_offset) +
284                        " #" +
285                        hex(dex_offset) +
286                        " " +
287                        str(address))
288                    if "offset_list" in data_lists:
289                        dex_offset_index = bisect.bisect(
290                            data_lists["offset_list"],
291                            dex_offset) - 1
292                        aligned_dex_offset = (data_lists["offset_list"]
293                                                        [dex_offset_index])
294                        dex_offset_data = (data_lists["offset_data"]
295                                                     [dex_offset_index])
296                        output_file.write(
297                            " " +
298                            "|".join(dex_offset_data) +
299                            " " +
300                            str(aligned_dex_offset != dex_offset))
301                    output_file.write("\n")
302    print_categories(categories, symbol_file_split, parsed_argv.out_dir_name)
303
304
305if __name__ == "__main__":
306    main()
307