1#!/usr/bin/env python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be
19    used by pprof.
20
21  Example:
22    python app_profiler.py
23    python pprof_proto_generator.py
24    pprof -text pprof.profile
25"""
26
27from __future__ import print_function
28import argparse
29import os
30import os.path
31
32from simpleperf_report_lib import ReportLib
33from utils import Addr2Nearestline, extant_dir, find_real_dso_path, find_tool_path, flatten_arg_list
34from utils import log_info, log_exit, ReadElf
35try:
36    import profile_pb2
37except ImportError:
38    log_exit('google.protobuf module is missing. Please install it first.')
39
40def load_pprof_profile(filename):
41    profile = profile_pb2.Profile()
42    with open(filename, "rb") as f:
43        profile.ParseFromString(f.read())
44    return profile
45
46
47def store_pprof_profile(filename, profile):
48    with open(filename, 'wb') as f:
49        f.write(profile.SerializeToString())
50
51
52class PprofProfilePrinter(object):
53
54    def __init__(self, profile):
55        self.profile = profile
56        self.string_table = profile.string_table
57
58    def show(self):
59        p = self.profile
60        sub_space = '  '
61        print('Profile {')
62        print('%d sample_types' % len(p.sample_type))
63        for i in range(len(p.sample_type)):
64            print('sample_type[%d] = ' % i, end='')
65            self.show_value_type(p.sample_type[i])
66        print('%d samples' % len(p.sample))
67        for i in range(len(p.sample)):
68            print('sample[%d]:' % i)
69            self.show_sample(p.sample[i], sub_space)
70        print('%d mappings' % len(p.mapping))
71        for i in range(len(p.mapping)):
72            print('mapping[%d]:' % i)
73            self.show_mapping(p.mapping[i], sub_space)
74        print('%d locations' % len(p.location))
75        for i in range(len(p.location)):
76            print('location[%d]:' % i)
77            self.show_location(p.location[i], sub_space)
78        for i in range(len(p.function)):
79            print('function[%d]:' % i)
80            self.show_function(p.function[i], sub_space)
81        print('%d strings' % len(p.string_table))
82        for i in range(len(p.string_table)):
83            print('string[%d]: %s' % (i, p.string_table[i]))
84        print('drop_frames: %s' % self.string(p.drop_frames))
85        print('keep_frames: %s' % self.string(p.keep_frames))
86        print('time_nanos: %u' % p.time_nanos)
87        print('duration_nanos: %u' % p.duration_nanos)
88        print('period_type: ', end='')
89        self.show_value_type(p.period_type)
90        print('period: %u' % p.period)
91        for i in range(len(p.comment)):
92            print('comment[%d] = %s' % (i, self.string(p.comment[i])))
93        print('default_sample_type: %d' % p.default_sample_type)
94        print('} // Profile')
95        print()
96
97    def show_value_type(self, value_type, space=''):
98        print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' %
99              (space, value_type.type, value_type.unit,
100               self.string(value_type.type), self.string(value_type.unit)))
101
102    def show_sample(self, sample, space=''):
103        sub_space = space + '  '
104        for i in range(len(sample.location_id)):
105            print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i]))
106            self.show_location_id(sample.location_id[i], sub_space)
107        for i in range(len(sample.value)):
108            print('%svalue[%d] = %d' % (space, i, sample.value[i]))
109        for i in range(len(sample.label)):
110            print('%slabel[%d] = ', (space, i))
111
112    def show_location_id(self, location_id, space=''):
113        location = self.profile.location[location_id - 1]
114        self.show_location(location, space)
115
116    def show_location(self, location, space=''):
117        sub_space = space + '  '
118        print('%sid: %d' % (space, location.id))
119        print('%smapping_id: %d' % (space, location.mapping_id))
120        self.show_mapping_id(location.mapping_id, sub_space)
121        print('%saddress: %x' % (space, location.address))
122        for i in range(len(location.line)):
123            print('%sline[%d]:' % (space, i))
124            self.show_line(location.line[i], sub_space)
125
126    def show_mapping_id(self, mapping_id, space=''):
127        mapping = self.profile.mapping[mapping_id - 1]
128        self.show_mapping(mapping, space)
129
130    def show_mapping(self, mapping, space=''):
131        print('%sid: %d' % (space, mapping.id))
132        print('%smemory_start: %x' % (space, mapping.memory_start))
133        print('%smemory_limit: %x' % (space, mapping.memory_limit))
134        print('%sfile_offset: %x' % (space, mapping.file_offset))
135        print('%sfilename: %s(%d)' % (space, self.string(mapping.filename),
136                                      mapping.filename))
137        print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id),
138                                      mapping.build_id))
139        print('%shas_functions: %s' % (space, mapping.has_functions))
140        print('%shas_filenames: %s' % (space, mapping.has_filenames))
141        print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers))
142        print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames))
143
144    def show_line(self, line, space=''):
145        sub_space = space + '  '
146        print('%sfunction_id: %d' % (space, line.function_id))
147        self.show_function_id(line.function_id, sub_space)
148        print('%sline: %d' % (space, line.line))
149
150    def show_function_id(self, function_id, space=''):
151        function = self.profile.function[function_id - 1]
152        self.show_function(function, space)
153
154    def show_function(self, function, space=''):
155        print('%sid: %d' % (space, function.id))
156        print('%sname: %s' % (space, self.string(function.name)))
157        print('%ssystem_name: %s' % (space, self.string(function.system_name)))
158        print('%sfilename: %s' % (space, self.string(function.filename)))
159        print('%sstart_line: %d' % (space, function.start_line))
160
161    def string(self, string_id):
162        return self.string_table[string_id]
163
164
165class Sample(object):
166
167    def __init__(self):
168        self.location_ids = []
169        self.values = {}
170
171    def add_location_id(self, location_id):
172        self.location_ids.append(location_id)
173
174    def add_value(self, sample_type_id, value):
175        self.values[sample_type_id] = self.values.get(sample_type_id, 0) + value
176
177    def add_values(self, values):
178        for sample_type_id, value in values.items():
179            self.add_value(sample_type_id, value)
180
181    @property
182    def key(self):
183        return tuple(self.location_ids)
184
185
186class Location(object):
187
188    def __init__(self, mapping_id, address, vaddr_in_dso):
189        self.id = -1  # unset
190        self.mapping_id = mapping_id
191        self.address = address
192        self.vaddr_in_dso = vaddr_in_dso
193        self.lines = []
194
195    @property
196    def key(self):
197        return (self.mapping_id, self.address)
198
199
200class Line(object):
201
202    def __init__(self):
203        self.function_id = 0
204        self.line = 0
205
206
207class Mapping(object):
208
209    def __init__(self, start, end, pgoff, filename_id, build_id_id):
210        self.id = -1  # unset
211        self.memory_start = start
212        self.memory_limit = end
213        self.file_offset = pgoff
214        self.filename_id = filename_id
215        self.build_id_id = build_id_id
216
217    @property
218    def key(self):
219        return (
220            self.memory_start,
221            self.memory_limit,
222            self.file_offset,
223            self.filename_id,
224            self.build_id_id)
225
226
227class Function(object):
228
229    def __init__(self, name_id, dso_name_id, vaddr_in_dso):
230        self.id = -1  # unset
231        self.name_id = name_id
232        self.dso_name_id = dso_name_id
233        self.vaddr_in_dso = vaddr_in_dso
234        self.source_filename_id = 0
235        self.start_line = 0
236
237    @property
238    def key(self):
239        return (self.name_id, self.dso_name_id)
240
241
242# pylint: disable=no-member
243class PprofProfileGenerator(object):
244
245    def __init__(self, config):
246        self.config = config
247        self.lib = None
248
249        config['binary_cache_dir'] = 'binary_cache'
250        if not os.path.isdir(config['binary_cache_dir']):
251            config['binary_cache_dir'] = None
252        self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None
253        if config.get('pid_filters'):
254            self.pid_filter = {int(x) for x in config['pid_filters']}
255        else:
256            self.pid_filter = None
257        if config.get('tid_filters'):
258            self.tid_filter = {int(x) for x in config['tid_filters']}
259        else:
260            self.tid_filter = None
261        self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
262        self.max_chain_length = config['max_chain_length']
263        self.profile = profile_pb2.Profile()
264        self.profile.string_table.append('')
265        self.string_table = {}
266        self.sample_types = {}
267        self.sample_map = {}
268        self.sample_list = []
269        self.location_map = {}
270        self.location_list = []
271        self.mapping_map = {}
272        self.mapping_list = []
273        self.function_map = {}
274        self.function_list = []
275
276        # Map from dso_name in perf.data to (binary path, build_id).
277        self.binary_map = {}
278        self.read_elf = ReadElf(self.config['ndk_path'])
279
280    def load_record_file(self, record_file):
281        self.lib = ReportLib()
282        self.lib.SetRecordFile(record_file)
283
284        if self.config['binary_cache_dir']:
285            self.lib.SetSymfs(self.config['binary_cache_dir'])
286            kallsyms = os.path.join(self.config['binary_cache_dir'], 'kallsyms')
287            if os.path.isfile(kallsyms):
288                self.lib.SetKallsymsFile(kallsyms)
289
290        if self.config.get('show_art_frames'):
291            self.lib.ShowArtFrames()
292
293        # Process all samples in perf.data, aggregate samples.
294        while True:
295            report_sample = self.lib.GetNextSample()
296            if report_sample is None:
297                self.lib.Close()
298                self.lib = None
299                break
300            event = self.lib.GetEventOfCurrentSample()
301            symbol = self.lib.GetSymbolOfCurrentSample()
302            callchain = self.lib.GetCallChainOfCurrentSample()
303
304            if not self._filter_report_sample(report_sample):
305                continue
306
307            sample_type_id = self.get_sample_type_id(event.name)
308            sample = Sample()
309            sample.add_value(sample_type_id, 1)
310            sample.add_value(sample_type_id + 1, report_sample.period)
311            if self._filter_symbol(symbol):
312                location_id = self.get_location_id(report_sample.ip, symbol)
313                sample.add_location_id(location_id)
314            for i in range(max(0, callchain.nr - self.max_chain_length), callchain.nr):
315                entry = callchain.entries[i]
316                if self._filter_symbol(symbol):
317                    location_id = self.get_location_id(entry.ip, entry.symbol)
318                    sample.add_location_id(location_id)
319            if sample.location_ids:
320                self.add_sample(sample)
321
322    def gen(self):
323        # 1. Generate line info for locations and functions.
324        self.gen_source_lines()
325
326        # 2. Produce samples/locations/functions in profile.
327        for sample in self.sample_list:
328            self.gen_profile_sample(sample)
329        for mapping in self.mapping_list:
330            self.gen_profile_mapping(mapping)
331        for location in self.location_list:
332            self.gen_profile_location(location)
333        for function in self.function_list:
334            self.gen_profile_function(function)
335
336        return self.profile
337
338    def _filter_report_sample(self, sample):
339        """Return true if the sample can be used."""
340        if self.comm_filter:
341            if sample.thread_comm not in self.comm_filter:
342                return False
343        if self.pid_filter:
344            if sample.pid not in self.pid_filter:
345                return False
346        if self.tid_filter:
347            if sample.tid not in self.tid_filter:
348                return False
349        return True
350
351    def _filter_symbol(self, symbol):
352        if not self.dso_filter or symbol.dso_name in self.dso_filter:
353            return True
354        return False
355
356    def get_string_id(self, str_value):
357        if not str_value:
358            return 0
359        str_id = self.string_table.get(str_value)
360        if str_id is not None:
361            return str_id
362        str_id = len(self.string_table) + 1
363        self.string_table[str_value] = str_id
364        self.profile.string_table.append(str_value)
365        return str_id
366
367    def get_string(self, str_id):
368        return self.profile.string_table[str_id]
369
370    def get_sample_type_id(self, name):
371        sample_type_id = self.sample_types.get(name)
372        if sample_type_id is not None:
373            return sample_type_id
374        sample_type_id = len(self.profile.sample_type)
375        sample_type = self.profile.sample_type.add()
376        sample_type.type = self.get_string_id('event_' + name + '_samples')
377        sample_type.unit = self.get_string_id('count')
378        sample_type = self.profile.sample_type.add()
379        sample_type.type = self.get_string_id('event_' + name + '_count')
380        sample_type.unit = self.get_string_id('count')
381        self.sample_types[name] = sample_type_id
382        return sample_type_id
383
384    def get_location_id(self, ip, symbol):
385        binary_path, build_id = self.get_binary(symbol.dso_name)
386        mapping_id = self.get_mapping_id(symbol.mapping[0], binary_path, build_id)
387        location = Location(mapping_id, ip, symbol.vaddr_in_file)
388        function_id = self.get_function_id(symbol.symbol_name, binary_path, symbol.symbol_addr)
389        if function_id:
390            # Add Line only when it has a valid function id, see http://b/36988814.
391            # Default line info only contains the function name
392            line = Line()
393            line.function_id = function_id
394            location.lines.append(line)
395
396        exist_location = self.location_map.get(location.key)
397        if exist_location:
398            return exist_location.id
399        # location_id starts from 1
400        location.id = len(self.location_list) + 1
401        self.location_list.append(location)
402        self.location_map[location.key] = location
403        return location.id
404
405    def get_mapping_id(self, report_mapping, filename, build_id):
406        filename_id = self.get_string_id(filename)
407        build_id_id = self.get_string_id(build_id)
408        mapping = Mapping(report_mapping.start, report_mapping.end,
409                          report_mapping.pgoff, filename_id, build_id_id)
410        exist_mapping = self.mapping_map.get(mapping.key)
411        if exist_mapping:
412            return exist_mapping.id
413        # mapping_id starts from 1
414        mapping.id = len(self.mapping_list) + 1
415        self.mapping_list.append(mapping)
416        self.mapping_map[mapping.key] = mapping
417        return mapping.id
418
419    def get_binary(self, dso_name):
420        """ Return (binary_path, build_id) for a given dso_name. """
421        value = self.binary_map.get(dso_name)
422        if value:
423            return value
424
425        binary_path = dso_name
426        build_id = ''
427
428        # The build ids in perf.data are padded to 20 bytes, but pprof needs without padding.
429        # So read build id from the binary in binary_cache, and check it with build id in
430        # perf.data.
431        build_id_in_perf_data = self.lib.GetBuildIdForPath(dso_name)
432        # Try elf_path in binary cache.
433        elf_path = find_real_dso_path(dso_name, self.config['binary_cache_dir'])
434        if elf_path:
435            elf_build_id = self.read_elf.get_build_id(elf_path, False)
436            if build_id_in_perf_data:
437                match = build_id_in_perf_data == self.read_elf.pad_build_id(elf_build_id)
438            else:
439                # odex files generated by ART on Android O don't contain build id.
440                match = not elf_build_id
441            if match:
442                build_id = elf_build_id
443                binary_path = elf_path
444
445        # When there is no matching elf_path, try converting build_id in perf.data.
446        if not build_id and build_id_in_perf_data.startswith('0x'):
447            # Fallback to the way used by TrimZeroesFromBuildIDString() in quipper.
448            build_id = build_id_in_perf_data[2:]  # remove '0x'
449            padding = '0' * 8
450            while build_id.endswith(padding):
451                build_id = build_id[:-len(padding)]
452
453        self.binary_map[dso_name] = (binary_path, build_id)
454        return (binary_path, build_id)
455
456    def get_mapping(self, mapping_id):
457        return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None
458
459    def get_function_id(self, name, dso_name, vaddr_in_file):
460        if name == 'unknown':
461            return 0
462        function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file)
463        exist_function = self.function_map.get(function.key)
464        if exist_function:
465            return exist_function.id
466        # function_id starts from 1
467        function.id = len(self.function_list) + 1
468        self.function_list.append(function)
469        self.function_map[function.key] = function
470        return function.id
471
472    def get_function(self, function_id):
473        return self.function_list[function_id - 1] if function_id > 0 else None
474
475    def add_sample(self, sample):
476        exist_sample = self.sample_map.get(sample.key)
477        if exist_sample:
478            exist_sample.add_values(sample.values)
479        else:
480            self.sample_list.append(sample)
481            self.sample_map[sample.key] = sample
482
483    def gen_source_lines(self):
484        # 1. Create Addr2line instance
485        if not self.config.get('binary_cache_dir'):
486            log_info("Can't generate line information because binary_cache is missing.")
487            return
488        if not find_tool_path('llvm-symbolizer', self.config['ndk_path']):
489            log_info("Can't generate line information because can't find llvm-symbolizer.")
490            return
491        # We have changed dso names to paths in binary_cache in self.get_binary(). So no need to
492        # pass binary_cache_dir to addr2line.
493        addr2line = Addr2Nearestline(self.config['ndk_path'], None, True)
494
495        # 2. Put all needed addresses to it.
496        for location in self.location_list:
497            mapping = self.get_mapping(location.mapping_id)
498            dso_name = self.get_string(mapping.filename_id)
499            if location.lines:
500                function = self.get_function(location.lines[0].function_id)
501                addr2line.add_addr(dso_name, function.vaddr_in_dso, location.vaddr_in_dso)
502        for function in self.function_list:
503            dso_name = self.get_string(function.dso_name_id)
504            addr2line.add_addr(dso_name, function.vaddr_in_dso, function.vaddr_in_dso)
505
506        # 3. Generate source lines.
507        addr2line.convert_addrs_to_lines()
508
509        # 4. Annotate locations and functions.
510        for location in self.location_list:
511            if not location.lines:
512                continue
513            mapping = self.get_mapping(location.mapping_id)
514            dso_name = self.get_string(mapping.filename_id)
515            dso = addr2line.get_dso(dso_name)
516            if not dso:
517                continue
518            sources = addr2line.get_addr_source(dso, location.vaddr_in_dso)
519            if not sources:
520                continue
521            for (source_id, source) in enumerate(sources):
522                source_file, source_line, function_name = source
523                function_id = self.get_function_id(function_name, dso_name, 0)
524                if function_id == 0:
525                    continue
526                if source_id == 0:
527                    # Clear default line info
528                    location.lines = []
529                location.lines.append(self.add_line(source_file, source_line, function_id))
530
531        for function in self.function_list:
532            dso_name = self.get_string(function.dso_name_id)
533            if function.vaddr_in_dso:
534                dso = addr2line.get_dso(dso_name)
535                if not dso:
536                    continue
537                sources = addr2line.get_addr_source(dso, function.vaddr_in_dso)
538                if sources:
539                    source_file, source_line, _ = sources[0]
540                    function.source_filename_id = self.get_string_id(source_file)
541                    function.start_line = source_line
542
543    def add_line(self, source_file, source_line, function_id):
544        line = Line()
545        function = self.get_function(function_id)
546        function.source_filename_id = self.get_string_id(source_file)
547        line.function_id = function_id
548        line.line = source_line
549        return line
550
551    def gen_profile_sample(self, sample):
552        profile_sample = self.profile.sample.add()
553        profile_sample.location_id.extend(sample.location_ids)
554        sample_type_count = len(self.sample_types) * 2
555        values = [0] * sample_type_count
556        for sample_type_id in sample.values:
557            values[sample_type_id] = sample.values[sample_type_id]
558        profile_sample.value.extend(values)
559
560    def gen_profile_mapping(self, mapping):
561        profile_mapping = self.profile.mapping.add()
562        profile_mapping.id = mapping.id
563        profile_mapping.memory_start = mapping.memory_start
564        profile_mapping.memory_limit = mapping.memory_limit
565        profile_mapping.file_offset = mapping.file_offset
566        profile_mapping.filename = mapping.filename_id
567        profile_mapping.build_id = mapping.build_id_id
568        profile_mapping.has_filenames = True
569        profile_mapping.has_functions = True
570        if self.config.get('binary_cache_dir'):
571            profile_mapping.has_line_numbers = True
572            profile_mapping.has_inline_frames = True
573        else:
574            profile_mapping.has_line_numbers = False
575            profile_mapping.has_inline_frames = False
576
577    def gen_profile_location(self, location):
578        profile_location = self.profile.location.add()
579        profile_location.id = location.id
580        profile_location.mapping_id = location.mapping_id
581        profile_location.address = location.address
582        for i in range(len(location.lines)):
583            line = profile_location.line.add()
584            line.function_id = location.lines[i].function_id
585            line.line = location.lines[i].line
586
587    def gen_profile_function(self, function):
588        profile_function = self.profile.function.add()
589        profile_function.id = function.id
590        profile_function.name = function.name_id
591        profile_function.system_name = function.name_id
592        profile_function.filename = function.source_filename_id
593        profile_function.start_line = function.start_line
594
595
596def main():
597    parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.')
598    parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.')
599    parser.add_argument('-i', '--record_file', nargs='+', default=['perf.data'], help="""
600        Set profiling data file to report. Default is perf.data""")
601    parser.add_argument('-o', '--output_file', default='pprof.profile', help="""
602        The path of generated pprof profile data.""")
603    parser.add_argument('--comm', nargs='+', action='append', help="""
604        Use samples only in threads with selected names.""")
605    parser.add_argument('--pid', nargs='+', action='append', help="""
606        Use samples only in processes with selected process ids.""")
607    parser.add_argument('--tid', nargs='+', action='append', help="""
608        Use samples only in threads with selected thread ids.""")
609    parser.add_argument('--dso', nargs='+', action='append', help="""
610        Use samples only in selected binaries.""")
611    parser.add_argument('--max_chain_length', type=int, default=1000000000, help="""
612        Maximum depth of samples to be converted.""")  # Large value as infinity standin.
613    parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.')
614    parser.add_argument('--show_art_frames', action='store_true',
615                        help='Show frames of internal methods in the ART Java interpreter.')
616
617    args = parser.parse_args()
618    if args.show:
619        show_file = args.show[0] if args.show[0] else 'pprof.profile'
620        profile = load_pprof_profile(show_file)
621        printer = PprofProfilePrinter(profile)
622        printer.show()
623        return
624
625    config = {}
626    config['output_file'] = args.output_file
627    config['comm_filters'] = flatten_arg_list(args.comm)
628    config['pid_filters'] = flatten_arg_list(args.pid)
629    config['tid_filters'] = flatten_arg_list(args.tid)
630    config['dso_filters'] = flatten_arg_list(args.dso)
631    config['ndk_path'] = args.ndk_path
632    config['show_art_frames'] = args.show_art_frames
633    config['max_chain_length'] = args.max_chain_length
634    generator = PprofProfileGenerator(config)
635    for record_file in args.record_file:
636        generator.load_record_file(record_file)
637    profile = generator.gen()
638    store_pprof_profile(config['output_file'], profile)
639
640
641if __name__ == '__main__':
642    main()
643