1#!/usr/bin/env python
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""simpleperf_report_lib.py: a python wrapper of libsimpleperf_report.so.
19   Used to access samples in perf.data.
20
21"""
22
23import collections
24import ctypes as ct
25import struct
26from utils import bytes_to_str, get_host_binary_path, is_windows, str_to_bytes
27
28
29def _get_native_lib():
30    return get_host_binary_path('libsimpleperf_report.so')
31
32
33def _is_null(p):
34    if p:
35        return False
36    return ct.cast(p, ct.c_void_p).value is None
37
38
39def _char_pt(s):
40    return str_to_bytes(s)
41
42
43def _char_pt_to_str(char_pt):
44    return bytes_to_str(char_pt)
45
46def _check(cond, failmsg):
47    if not cond:
48        raise RuntimeError(failmsg)
49
50
51class SampleStruct(ct.Structure):
52    """ Instance of a sample in perf.data.
53        ip: the program counter of the thread generating the sample.
54        pid: process id (or thread group id) of the thread generating the sample.
55        tid: thread id.
56        thread_comm: thread name.
57        time: time at which the sample was generated. The value is in nanoseconds.
58              The clock is decided by the --clockid option in `simpleperf record`.
59        in_kernel: whether the instruction is in kernel space or user space.
60        cpu: the cpu generating the sample.
61        period: count of events have happened since last sample. For example, if we use
62             -e cpu-cycles, it means how many cpu-cycles have happened.
63             If we use -e cpu-clock, it means how many nanoseconds have passed.
64    """
65    _fields_ = [('ip', ct.c_uint64),
66                ('pid', ct.c_uint32),
67                ('tid', ct.c_uint32),
68                ('_thread_comm', ct.c_char_p),
69                ('time', ct.c_uint64),
70                ('in_kernel', ct.c_uint32),
71                ('cpu', ct.c_uint32),
72                ('period', ct.c_uint64)]
73
74    @property
75    def thread_comm(self):
76        return _char_pt_to_str(self._thread_comm)
77
78
79class TracingFieldFormatStruct(ct.Structure):
80    """Format of a tracing field.
81       name: name of the field.
82       offset: offset of the field in tracing data.
83       elem_size: size of the element type.
84       elem_count: the number of elements in this field, more than one if the field is an array.
85       is_signed: whether the element type is signed or unsigned.
86    """
87    _fields_ = [('_name', ct.c_char_p),
88                ('offset', ct.c_uint32),
89                ('elem_size', ct.c_uint32),
90                ('elem_count', ct.c_uint32),
91                ('is_signed', ct.c_uint32)]
92
93    _unpack_key_dict = {1: 'b', 2: 'h', 4: 'i', 8: 'q'}
94
95    @property
96    def name(self):
97        return _char_pt_to_str(self._name)
98
99    def parse_value(self, data):
100        """ Parse value of a field in a tracepoint event.
101            The return value depends on the type of the field, and can be an int value, a string,
102            an array of int values, etc. If the type can't be parsed, return a byte array or an
103            array of byte arrays.
104        """
105        if self.elem_count > 1 and self.elem_size == 1:
106            # Probably the field is a string.
107            # Don't use self.is_signed, which has different values on x86 and arm.
108            length = 0
109            while length < self.elem_count and bytes_to_str(data[self.offset + length]) != '\x00':
110                length += 1
111            return bytes_to_str(data[self.offset : self.offset + length])
112        unpack_key = self._unpack_key_dict.get(self.elem_size)
113        if unpack_key:
114            if not self.is_signed:
115                unpack_key = unpack_key.upper()
116            value = struct.unpack('%d%s' % (self.elem_count, unpack_key),
117                                  data[self.offset:self.offset + self.elem_count * self.elem_size])
118        else:
119            # Since we don't know the element type, just return the bytes.
120            value = []
121            offset = self.offset
122            for _ in range(self.elem_count):
123                value.append(data[offset : offset + self.elem_size])
124                offset += self.elem_size
125        if self.elem_count == 1:
126            value = value[0]
127        return value
128
129
130class TracingDataFormatStruct(ct.Structure):
131    """Format of tracing data of a tracepoint event, like
132       https://www.kernel.org/doc/html/latest/trace/events.html#event-formats.
133       size: total size of all fields in the tracing data.
134       field_count: the number of fields.
135       fields: an array of fields.
136    """
137    _fields_ = [('size', ct.c_uint32),
138                ('field_count', ct.c_uint32),
139                ('fields', ct.POINTER(TracingFieldFormatStruct))]
140
141
142class EventStruct(ct.Structure):
143    """Event type of a sample.
144       name: name of the event type.
145       tracing_data_format: only available when it is a tracepoint event.
146    """
147    _fields_ = [('_name', ct.c_char_p),
148                ('tracing_data_format', TracingDataFormatStruct)]
149
150    @property
151    def name(self):
152        return _char_pt_to_str(self._name)
153
154
155class MappingStruct(ct.Structure):
156    """ A mapping area in the monitored threads, like the content in /proc/<pid>/maps.
157        start: start addr in memory.
158        end: end addr in memory.
159        pgoff: offset in the mapped shared library.
160    """
161    _fields_ = [('start', ct.c_uint64),
162                ('end', ct.c_uint64),
163                ('pgoff', ct.c_uint64)]
164
165
166class SymbolStruct(ct.Structure):
167    """ Symbol info of the instruction hit by a sample or a callchain entry of a sample.
168        dso_name: path of the shared library containing the instruction.
169        vaddr_in_file: virtual address of the instruction in the shared library.
170        symbol_name: name of the function containing the instruction.
171        symbol_addr: start addr of the function containing the instruction.
172        symbol_len: length of the function in the shared library.
173        mapping: the mapping area hit by the instruction.
174    """
175    _fields_ = [('_dso_name', ct.c_char_p),
176                ('vaddr_in_file', ct.c_uint64),
177                ('_symbol_name', ct.c_char_p),
178                ('symbol_addr', ct.c_uint64),
179                ('symbol_len', ct.c_uint64),
180                ('mapping', ct.POINTER(MappingStruct))]
181
182    @property
183    def dso_name(self):
184        return _char_pt_to_str(self._dso_name)
185
186    @property
187    def symbol_name(self):
188        return _char_pt_to_str(self._symbol_name)
189
190
191class CallChainEntryStructure(ct.Structure):
192    """ A callchain entry of a sample.
193        ip: the address of the instruction of the callchain entry.
194        symbol: symbol info of the callchain entry.
195    """
196    _fields_ = [('ip', ct.c_uint64),
197                ('symbol', SymbolStruct)]
198
199
200class CallChainStructure(ct.Structure):
201    """ Callchain info of a sample.
202        nr: number of entries in the callchain.
203        entries: a pointer to an array of CallChainEntryStructure.
204
205        For example, if a sample is generated when a thread is running function C
206        with callchain function A -> function B -> function C.
207        Then nr = 2, and entries = [function B, function A].
208    """
209    _fields_ = [('nr', ct.c_uint32),
210                ('entries', ct.POINTER(CallChainEntryStructure))]
211
212
213class FeatureSectionStructure(ct.Structure):
214    """ A feature section in perf.data to store information like record cmd, device arch, etc.
215        data: a pointer to a buffer storing the section data.
216        data_size: data size in bytes.
217    """
218    _fields_ = [('data', ct.POINTER(ct.c_char)),
219                ('data_size', ct.c_uint32)]
220
221
222class ReportLibStructure(ct.Structure):
223    _fields_ = []
224
225
226# pylint: disable=invalid-name
227class ReportLib(object):
228
229    def __init__(self, native_lib_path=None):
230        if native_lib_path is None:
231            native_lib_path = _get_native_lib()
232
233        self._load_dependent_lib()
234        self._lib = ct.CDLL(native_lib_path)
235        self._CreateReportLibFunc = self._lib.CreateReportLib
236        self._CreateReportLibFunc.restype = ct.POINTER(ReportLibStructure)
237        self._DestroyReportLibFunc = self._lib.DestroyReportLib
238        self._SetLogSeverityFunc = self._lib.SetLogSeverity
239        self._SetSymfsFunc = self._lib.SetSymfs
240        self._SetRecordFileFunc = self._lib.SetRecordFile
241        self._SetKallsymsFileFunc = self._lib.SetKallsymsFile
242        self._ShowIpForUnknownSymbolFunc = self._lib.ShowIpForUnknownSymbol
243        self._ShowArtFramesFunc = self._lib.ShowArtFrames
244        self._MergeJavaMethodsFunc = self._lib.MergeJavaMethods
245        self._GetNextSampleFunc = self._lib.GetNextSample
246        self._GetNextSampleFunc.restype = ct.POINTER(SampleStruct)
247        self._GetEventOfCurrentSampleFunc = self._lib.GetEventOfCurrentSample
248        self._GetEventOfCurrentSampleFunc.restype = ct.POINTER(EventStruct)
249        self._GetSymbolOfCurrentSampleFunc = self._lib.GetSymbolOfCurrentSample
250        self._GetSymbolOfCurrentSampleFunc.restype = ct.POINTER(SymbolStruct)
251        self._GetCallChainOfCurrentSampleFunc = self._lib.GetCallChainOfCurrentSample
252        self._GetCallChainOfCurrentSampleFunc.restype = ct.POINTER(CallChainStructure)
253        self._GetTracingDataOfCurrentSampleFunc = self._lib.GetTracingDataOfCurrentSample
254        self._GetTracingDataOfCurrentSampleFunc.restype = ct.POINTER(ct.c_char)
255        self._GetBuildIdForPathFunc = self._lib.GetBuildIdForPath
256        self._GetBuildIdForPathFunc.restype = ct.c_char_p
257        self._GetFeatureSection = self._lib.GetFeatureSection
258        self._GetFeatureSection.restype = ct.POINTER(FeatureSectionStructure)
259        self._instance = self._CreateReportLibFunc()
260        assert not _is_null(self._instance)
261
262        self.meta_info = None
263        self.current_sample = None
264        self.record_cmd = None
265
266    def _load_dependent_lib(self):
267        # As the windows dll is built with mingw we need to load 'libwinpthread-1.dll'.
268        if is_windows():
269            self._libwinpthread = ct.CDLL(get_host_binary_path('libwinpthread-1.dll'))
270
271    def Close(self):
272        if self._instance is None:
273            return
274        self._DestroyReportLibFunc(self._instance)
275        self._instance = None
276
277    def SetLogSeverity(self, log_level='info'):
278        """ Set log severity of native lib, can be verbose,debug,info,error,fatal."""
279        cond = self._SetLogSeverityFunc(self.getInstance(), _char_pt(log_level))
280        _check(cond, 'Failed to set log level')
281
282    def SetSymfs(self, symfs_dir):
283        """ Set directory used to find symbols."""
284        cond = self._SetSymfsFunc(self.getInstance(), _char_pt(symfs_dir))
285        _check(cond, 'Failed to set symbols directory')
286
287    def SetRecordFile(self, record_file):
288        """ Set the path of record file, like perf.data."""
289        cond = self._SetRecordFileFunc(self.getInstance(), _char_pt(record_file))
290        _check(cond, 'Failed to set record file')
291
292    def ShowIpForUnknownSymbol(self):
293        self._ShowIpForUnknownSymbolFunc(self.getInstance())
294
295    def ShowArtFrames(self, show=True):
296        """ Show frames of internal methods of the Java interpreter. """
297        self._ShowArtFramesFunc(self.getInstance(), show)
298
299    def MergeJavaMethods(self, merge=True):
300        """ This option merges jitted java methods with the same name but in different jit
301            symfiles. If possible, it also merges jitted methods with interpreted methods,
302            by mapping jitted methods to their corresponding dex files.
303            Side effects:
304              It only works at method level, not instruction level.
305              It makes symbol.vaddr_in_file and symbol.mapping not accurate for jitted methods.
306            Java methods are merged by default.
307        """
308        self._MergeJavaMethodsFunc(self.getInstance(), merge)
309
310    def SetKallsymsFile(self, kallsym_file):
311        """ Set the file path to a copy of the /proc/kallsyms file (for off device decoding) """
312        cond = self._SetKallsymsFileFunc(self.getInstance(), _char_pt(kallsym_file))
313        _check(cond, 'Failed to set kallsyms file')
314
315    def GetNextSample(self):
316        psample = self._GetNextSampleFunc(self.getInstance())
317        if _is_null(psample):
318            self.current_sample = None
319        else:
320            self.current_sample = psample[0]
321        return self.current_sample
322
323    def GetCurrentSample(self):
324        return self.current_sample
325
326    def GetEventOfCurrentSample(self):
327        event = self._GetEventOfCurrentSampleFunc(self.getInstance())
328        assert not _is_null(event)
329        return event[0]
330
331    def GetSymbolOfCurrentSample(self):
332        symbol = self._GetSymbolOfCurrentSampleFunc(self.getInstance())
333        assert not _is_null(symbol)
334        return symbol[0]
335
336    def GetCallChainOfCurrentSample(self):
337        callchain = self._GetCallChainOfCurrentSampleFunc(self.getInstance())
338        assert not _is_null(callchain)
339        return callchain[0]
340
341    def GetTracingDataOfCurrentSample(self):
342        data = self._GetTracingDataOfCurrentSampleFunc(self.getInstance())
343        if _is_null(data):
344            return None
345        event = self.GetEventOfCurrentSample()
346        result = collections.OrderedDict()
347        for i in range(event.tracing_data_format.field_count):
348            field = event.tracing_data_format.fields[i]
349            result[field.name] = field.parse_value(data)
350        return result
351
352    def GetBuildIdForPath(self, path):
353        build_id = self._GetBuildIdForPathFunc(self.getInstance(), _char_pt(path))
354        assert not _is_null(build_id)
355        return _char_pt_to_str(build_id)
356
357    def GetRecordCmd(self):
358        if self.record_cmd is not None:
359            return self.record_cmd
360        self.record_cmd = ''
361        feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('cmdline'))
362        if not _is_null(feature_data):
363            void_p = ct.cast(feature_data[0].data, ct.c_void_p)
364            arg_count = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
365            void_p.value += 4
366            args = []
367            for _ in range(arg_count):
368                str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
369                void_p.value += 4
370                char_p = ct.cast(void_p, ct.POINTER(ct.c_char))
371                current_str = ''
372                for j in range(str_len):
373                    c = bytes_to_str(char_p[j])
374                    if c != '\0':
375                        current_str += c
376                if ' ' in current_str:
377                    current_str = '"' + current_str + '"'
378                args.append(current_str)
379                void_p.value += str_len
380            self.record_cmd = ' '.join(args)
381        return self.record_cmd
382
383    def _GetFeatureString(self, feature_name):
384        feature_data = self._GetFeatureSection(self.getInstance(), _char_pt(feature_name))
385        result = ''
386        if not _is_null(feature_data):
387            void_p = ct.cast(feature_data[0].data, ct.c_void_p)
388            str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
389            void_p.value += 4
390            char_p = ct.cast(void_p, ct.POINTER(ct.c_char))
391            for i in range(str_len):
392                c = bytes_to_str(char_p[i])
393                if c == '\0':
394                    break
395                result += c
396        return result
397
398    def GetArch(self):
399        return self._GetFeatureString('arch')
400
401    def MetaInfo(self):
402        """ Return a string to string map stored in meta_info section in perf.data.
403            It is used to pass some short meta information.
404        """
405        if self.meta_info is None:
406            self.meta_info = {}
407            feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('meta_info'))
408            if not _is_null(feature_data):
409                str_list = []
410                data = feature_data[0].data
411                data_size = feature_data[0].data_size
412                current_str = ''
413                for i in range(data_size):
414                    c = bytes_to_str(data[i])
415                    if c != '\0':
416                        current_str += c
417                    else:
418                        str_list.append(current_str)
419                        current_str = ''
420                for i in range(0, len(str_list), 2):
421                    self.meta_info[str_list[i]] = str_list[i + 1]
422        return self.meta_info
423
424    def getInstance(self):
425        if self._instance is None:
426            raise Exception('Instance is Closed')
427        return self._instance
428