1#!/usr/bin/env python
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""annotate.py: annotate source files based on perf.data.
19"""
20
21
22import argparse
23import os
24import os.path
25import shutil
26
27from simpleperf_report_lib import ReportLib
28from utils import log_info, log_warning, log_exit
29from utils import Addr2Nearestline, extant_dir, flatten_arg_list, is_windows, SourceFileSearcher
30
31class SourceLine(object):
32    def __init__(self, file_id, function, line):
33        self.file = file_id
34        self.function = function
35        self.line = line
36
37    @property
38    def file_key(self):
39        return self.file
40
41    @property
42    def function_key(self):
43        return (self.file, self.function)
44
45    @property
46    def line_key(self):
47        return (self.file, self.line)
48
49
50class Addr2Line(object):
51    """collect information of how to map [dso_name, vaddr] to [source_file:line].
52    """
53    def __init__(self, ndk_path, binary_cache_path, source_dirs):
54        self.addr2line = Addr2Nearestline(ndk_path, binary_cache_path, True)
55        self.source_searcher = SourceFileSearcher(source_dirs)
56
57    def add_addr(self, dso_path, func_addr, addr):
58        self.addr2line.add_addr(dso_path, func_addr, addr)
59
60    def convert_addrs_to_lines(self):
61        self.addr2line.convert_addrs_to_lines()
62
63    def get_sources(self, dso_path, addr):
64        dso = self.addr2line.get_dso(dso_path)
65        if not dso:
66            return []
67        source = self.addr2line.get_addr_source(dso, addr)
68        if not source:
69            return []
70        result = []
71        for (source_file, source_line, function_name) in source:
72            source_file_path = self.source_searcher.get_real_path(source_file)
73            if not source_file_path:
74                source_file_path = source_file
75            result.append(SourceLine(source_file_path, function_name, source_line))
76        return result
77
78
79class Period(object):
80    """event count information. It can be used to represent event count
81       of a line, a function, a source file, or a binary. It contains two
82       parts: period and acc_period.
83       When used for a line, period is the event count occurred when running
84       that line, acc_period is the accumulated event count occurred when
85       running that line and functions called by that line. Same thing applies
86       when it is used for a function, a source file, or a binary.
87    """
88    def __init__(self, period=0, acc_period=0):
89        self.period = period
90        self.acc_period = acc_period
91
92
93    def __iadd__(self, other):
94        self.period += other.period
95        self.acc_period += other.acc_period
96        return self
97
98
99class DsoPeriod(object):
100    """Period for each shared library"""
101    def __init__(self, dso_name):
102        self.dso_name = dso_name
103        self.period = Period()
104
105
106    def add_period(self, period):
107        self.period += period
108
109
110class FilePeriod(object):
111    """Period for each source file"""
112    def __init__(self, file_id):
113        self.file = file_id
114        self.period = Period()
115        # Period for each line in the file.
116        self.line_dict = {}
117        # Period for each function in the source file.
118        self.function_dict = {}
119
120
121    def add_period(self, period):
122        self.period += period
123
124
125    def add_line_period(self, line, period):
126        a = self.line_dict.get(line)
127        if a is None:
128            self.line_dict[line] = a = Period()
129        a += period
130
131
132    def add_function_period(self, function_name, function_start_line, period):
133        a = self.function_dict.get(function_name)
134        if not a:
135            if function_start_line is None:
136                function_start_line = -1
137            self.function_dict[function_name] = a = [function_start_line, Period()]
138        a[1] += period
139
140
141class SourceFileAnnotator(object):
142    """group code for annotating source files"""
143    def __init__(self, config):
144        # check config variables
145        config_names = ['perf_data_list', 'source_dirs', 'comm_filters',
146                        'pid_filters', 'tid_filters', 'dso_filters', 'ndk_path']
147        for name in config_names:
148            if name not in config:
149                log_exit('config [%s] is missing' % name)
150        symfs_dir = 'binary_cache'
151        if not os.path.isdir(symfs_dir):
152            symfs_dir = None
153        kallsyms = 'binary_cache/kallsyms'
154        if not os.path.isfile(kallsyms):
155            kallsyms = None
156
157        # init member variables
158        self.config = config
159        self.symfs_dir = symfs_dir
160        self.kallsyms = kallsyms
161        self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None
162        if config.get('pid_filters'):
163            self.pid_filter = {int(x) for x in config['pid_filters']}
164        else:
165            self.pid_filter = None
166        if config.get('tid_filters'):
167            self.tid_filter = {int(x) for x in config['tid_filters']}
168        else:
169            self.tid_filter = None
170        self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
171
172        config['annotate_dest_dir'] = 'annotated_files'
173        output_dir = config['annotate_dest_dir']
174        if os.path.isdir(output_dir):
175            shutil.rmtree(output_dir)
176        os.makedirs(output_dir)
177
178
179        self.addr2line = Addr2Line(self.config['ndk_path'], symfs_dir, config.get('source_dirs'))
180        self.period = 0
181        self.dso_periods = {}
182        self.file_periods = {}
183
184
185    def annotate(self):
186        self._collect_addrs()
187        self._convert_addrs_to_lines()
188        self._generate_periods()
189        self._write_summary()
190        self._annotate_files()
191
192
193    def _collect_addrs(self):
194        """Read perf.data, collect all addresses we need to convert to
195           source file:line.
196        """
197        for perf_data in self.config['perf_data_list']:
198            lib = ReportLib()
199            lib.SetRecordFile(perf_data)
200            if self.symfs_dir:
201                lib.SetSymfs(self.symfs_dir)
202            if self.kallsyms:
203                lib.SetKallsymsFile(self.kallsyms)
204            while True:
205                sample = lib.GetNextSample()
206                if sample is None:
207                    lib.Close()
208                    break
209                if not self._filter_sample(sample):
210                    continue
211                symbols = []
212                symbols.append(lib.GetSymbolOfCurrentSample())
213                callchain = lib.GetCallChainOfCurrentSample()
214                for i in range(callchain.nr):
215                    symbols.append(callchain.entries[i].symbol)
216                for symbol in symbols:
217                    if self._filter_symbol(symbol):
218                        self.addr2line.add_addr(symbol.dso_name, symbol.symbol_addr,
219                                                symbol.vaddr_in_file)
220                        self.addr2line.add_addr(symbol.dso_name, symbol.symbol_addr,
221                                                symbol.symbol_addr)
222
223
224    def _filter_sample(self, sample):
225        """Return true if the sample can be used."""
226        if self.comm_filter:
227            if sample.thread_comm not in self.comm_filter:
228                return False
229        if self.pid_filter:
230            if sample.pid not in self.pid_filter:
231                return False
232        if self.tid_filter:
233            if sample.tid not in self.tid_filter:
234                return False
235        return True
236
237
238    def _filter_symbol(self, symbol):
239        if not self.dso_filter or symbol.dso_name in self.dso_filter:
240            return True
241        return False
242
243
244    def _convert_addrs_to_lines(self):
245        self.addr2line.convert_addrs_to_lines()
246
247
248    def _generate_periods(self):
249        """read perf.data, collect Period for all types:
250            binaries, source files, functions, lines.
251        """
252        for perf_data in self.config['perf_data_list']:
253            lib = ReportLib()
254            lib.SetRecordFile(perf_data)
255            if self.symfs_dir:
256                lib.SetSymfs(self.symfs_dir)
257            if self.kallsyms:
258                lib.SetKallsymsFile(self.kallsyms)
259            while True:
260                sample = lib.GetNextSample()
261                if sample is None:
262                    lib.Close()
263                    break
264                if not self._filter_sample(sample):
265                    continue
266                self._generate_periods_for_sample(lib, sample)
267
268
269    def _generate_periods_for_sample(self, lib, sample):
270        symbols = []
271        symbols.append(lib.GetSymbolOfCurrentSample())
272        callchain = lib.GetCallChainOfCurrentSample()
273        for i in range(callchain.nr):
274            symbols.append(callchain.entries[i].symbol)
275        # Each sample has a callchain, but its period is only used once
276        # to add period for each function/source_line/source_file/binary.
277        # For example, if more than one entry in the callchain hits a
278        # function, the event count of that function is only increased once.
279        # Otherwise, we may get periods > 100%.
280        is_sample_used = False
281        used_dso_dict = {}
282        used_file_dict = {}
283        used_function_dict = {}
284        used_line_dict = {}
285        period = Period(sample.period, sample.period)
286        for j, symbol in enumerate(symbols):
287            if j == 1:
288                period = Period(0, sample.period)
289            if not self._filter_symbol(symbol):
290                continue
291            is_sample_used = True
292            # Add period to dso.
293            self._add_dso_period(symbol.dso_name, period, used_dso_dict)
294            # Add period to source file.
295            sources = self.addr2line.get_sources(symbol.dso_name, symbol.vaddr_in_file)
296            for source in sources:
297                if source.file:
298                    self._add_file_period(source, period, used_file_dict)
299                    # Add period to line.
300                    if source.line:
301                        self._add_line_period(source, period, used_line_dict)
302            # Add period to function.
303            sources = self.addr2line.get_sources(symbol.dso_name, symbol.symbol_addr)
304            for source in sources:
305                if source.file:
306                    self._add_file_period(source, period, used_file_dict)
307                    if source.function:
308                        self._add_function_period(source, period, used_function_dict)
309
310        if is_sample_used:
311            self.period += sample.period
312
313
314    def _add_dso_period(self, dso_name, period, used_dso_dict):
315        if dso_name not in used_dso_dict:
316            used_dso_dict[dso_name] = True
317            dso_period = self.dso_periods.get(dso_name)
318            if dso_period is None:
319                dso_period = self.dso_periods[dso_name] = DsoPeriod(dso_name)
320            dso_period.add_period(period)
321
322
323    def _add_file_period(self, source, period, used_file_dict):
324        if source.file_key not in used_file_dict:
325            used_file_dict[source.file_key] = True
326            file_period = self.file_periods.get(source.file)
327            if file_period is None:
328                file_period = self.file_periods[source.file] = FilePeriod(source.file)
329            file_period.add_period(period)
330
331
332    def _add_line_period(self, source, period, used_line_dict):
333        if source.line_key not in used_line_dict:
334            used_line_dict[source.line_key] = True
335            file_period = self.file_periods[source.file]
336            file_period.add_line_period(source.line, period)
337
338
339    def _add_function_period(self, source, period, used_function_dict):
340        if source.function_key not in used_function_dict:
341            used_function_dict[source.function_key] = True
342            file_period = self.file_periods[source.file]
343            file_period.add_function_period(source.function, source.line, period)
344
345
346    def _write_summary(self):
347        summary = os.path.join(self.config['annotate_dest_dir'], 'summary')
348        with open(summary, 'w') as f:
349            f.write('total period: %d\n\n' % self.period)
350            dso_periods = sorted(self.dso_periods.values(),
351                                 key=lambda x: x.period.acc_period, reverse=True)
352            for dso_period in dso_periods:
353                f.write('dso %s: %s\n' % (dso_period.dso_name,
354                                          self._get_percentage_str(dso_period.period)))
355            f.write('\n')
356
357            file_periods = sorted(self.file_periods.values(),
358                                  key=lambda x: x.period.acc_period, reverse=True)
359            for file_period in file_periods:
360                f.write('file %s: %s\n' % (file_period.file,
361                                           self._get_percentage_str(file_period.period)))
362            for file_period in file_periods:
363                f.write('\n\n%s: %s\n' % (file_period.file,
364                                          self._get_percentage_str(file_period.period)))
365                values = []
366                for func_name in file_period.function_dict.keys():
367                    func_start_line, period = file_period.function_dict[func_name]
368                    values.append((func_name, func_start_line, period))
369                values = sorted(values, key=lambda x: x[2].acc_period, reverse=True)
370                for value in values:
371                    f.write('\tfunction (%s): line %d, %s\n' % (
372                        value[0], value[1], self._get_percentage_str(value[2])))
373                f.write('\n')
374                for line in sorted(file_period.line_dict.keys()):
375                    f.write('\tline %d: %s\n' % (
376                        line, self._get_percentage_str(file_period.line_dict[line])))
377
378
379    def _get_percentage_str(self, period, short=False):
380        s = 'acc_p: %f%%, p: %f%%' if short else 'accumulated_period: %f%%, period: %f%%'
381        return s % self._get_percentage(period)
382
383
384    def _get_percentage(self, period):
385        if self.period == 0:
386            return (0, 0)
387        acc_p = 100.0 * period.acc_period / self.period
388        p = 100.0 * period.period / self.period
389        return (acc_p, p)
390
391
392    def _annotate_files(self):
393        """Annotate Source files: add acc_period/period for each source file.
394           1. Annotate java source files, which have $JAVA_SRC_ROOT prefix.
395           2. Annotate c++ source files.
396        """
397        dest_dir = self.config['annotate_dest_dir']
398        for key in self.file_periods:
399            from_path = key
400            if not os.path.isfile(from_path):
401                log_warning("can't find source file for path %s" % from_path)
402                continue
403            if from_path.startswith('/'):
404                to_path = os.path.join(dest_dir, from_path[1:])
405            elif is_windows() and ':\\' in from_path:
406                to_path = os.path.join(dest_dir, from_path.replace(':\\', os.sep))
407            else:
408                to_path = os.path.join(dest_dir, from_path)
409            is_java = from_path.endswith('.java')
410            self._annotate_file(from_path, to_path, self.file_periods[key], is_java)
411
412
413    def _annotate_file(self, from_path, to_path, file_period, is_java):
414        """Annotate a source file.
415
416        Annotate a source file in three steps:
417          1. In the first line, show periods of this file.
418          2. For each function, show periods of this function.
419          3. For each line not hitting the same line as functions, show
420             line periods.
421        """
422        log_info('annotate file %s' % from_path)
423        with open(from_path, 'r') as rf:
424            lines = rf.readlines()
425
426        annotates = {}
427        for line in file_period.line_dict.keys():
428            annotates[line] = self._get_percentage_str(file_period.line_dict[line], True)
429        for func_name in file_period.function_dict.keys():
430            func_start_line, period = file_period.function_dict[func_name]
431            if func_start_line == -1:
432                continue
433            line = func_start_line - 1 if is_java else func_start_line
434            annotates[line] = '[func] ' + self._get_percentage_str(period, True)
435        annotates[1] = '[file] ' + self._get_percentage_str(file_period.period, True)
436
437        max_annotate_cols = 0
438        for key in annotates:
439            max_annotate_cols = max(max_annotate_cols, len(annotates[key]))
440
441        empty_annotate = ' ' * (max_annotate_cols + 6)
442
443        dirname = os.path.dirname(to_path)
444        if not os.path.isdir(dirname):
445            os.makedirs(dirname)
446        with open(to_path, 'w') as wf:
447            for line in range(1, len(lines) + 1):
448                annotate = annotates.get(line)
449                if annotate is None:
450                    if not lines[line-1].strip():
451                        annotate = ''
452                    else:
453                        annotate = empty_annotate
454                else:
455                    annotate = '/* ' + annotate + (
456                        ' ' * (max_annotate_cols - len(annotate))) + ' */'
457                wf.write(annotate)
458                wf.write(lines[line-1])
459
460def main():
461    parser = argparse.ArgumentParser(description="""
462        Annotate source files based on profiling data. It reads line information from binary_cache
463        generated by app_profiler.py or binary_cache_builder.py, and generate annotated source
464        files in annotated_files directory.""")
465    parser.add_argument('-i', '--perf_data_list', nargs='+', action='append', help="""
466        The paths of profiling data. Default is perf.data.""")
467    parser.add_argument('-s', '--source_dirs', type=extant_dir, nargs='+', action='append', help="""
468        Directories to find source files.""")
469    parser.add_argument('--comm', nargs='+', action='append', help="""
470        Use samples only in threads with selected names.""")
471    parser.add_argument('--pid', nargs='+', action='append', help="""
472        Use samples only in processes with selected process ids.""")
473    parser.add_argument('--tid', nargs='+', action='append', help="""
474        Use samples only in threads with selected thread ids.""")
475    parser.add_argument('--dso', nargs='+', action='append', help="""
476        Use samples only in selected binaries.""")
477    parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.')
478
479    args = parser.parse_args()
480    config = {}
481    config['perf_data_list'] = flatten_arg_list(args.perf_data_list)
482    if not config['perf_data_list']:
483        config['perf_data_list'].append('perf.data')
484    config['source_dirs'] = flatten_arg_list(args.source_dirs)
485    config['comm_filters'] = flatten_arg_list(args.comm)
486    config['pid_filters'] = flatten_arg_list(args.pid)
487    config['tid_filters'] = flatten_arg_list(args.tid)
488    config['dso_filters'] = flatten_arg_list(args.dso)
489    config['ndk_path'] = args.ndk_path
490
491    annotator = SourceFileAnnotator(config)
492    annotator.annotate()
493    log_info('annotate finish successfully, please check result in annotated_files/.')
494
495if __name__ == '__main__':
496    main()
497