1#!/usr/bin/env python
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""utils.py: export utility functions.
19"""
20
21from __future__ import print_function
22import argparse
23import logging
24import os
25import os.path
26import re
27import shutil
28import subprocess
29import sys
30import time
31
32def get_script_dir():
33    return os.path.dirname(os.path.realpath(__file__))
34
35def is_windows():
36    return sys.platform == 'win32' or sys.platform == 'cygwin'
37
38def is_darwin():
39    return sys.platform == 'darwin'
40
41def get_platform():
42    if is_windows():
43        return 'windows'
44    if is_darwin():
45        return 'darwin'
46    return 'linux'
47
48def is_python3():
49    return sys.version_info >= (3, 0)
50
51
52def log_debug(msg):
53    logging.debug(msg)
54
55
56def log_info(msg):
57    logging.info(msg)
58
59
60def log_warning(msg):
61    logging.warning(msg)
62
63
64def log_fatal(msg):
65    raise Exception(msg)
66
67def log_exit(msg):
68    sys.exit(msg)
69
70def disable_debug_log():
71    logging.getLogger().setLevel(logging.WARN)
72
73def set_log_level(level_name):
74    if level_name == 'debug':
75        level = logging.DEBUG
76    elif level_name == 'info':
77        level = logging.INFO
78    elif level_name == 'warning':
79        level = logging.WARNING
80    else:
81        log_fatal('unknown log level: %s' % level_name)
82    logging.getLogger().setLevel(level)
83
84def str_to_bytes(str_value):
85    if not is_python3():
86        return str_value
87    # In python 3, str are wide strings whereas the C api expects 8 bit strings,
88    # hence we have to convert. For now using utf-8 as the encoding.
89    return str_value.encode('utf-8')
90
91def bytes_to_str(bytes_value):
92    if not bytes_value:
93        return ''
94    if not is_python3():
95        return bytes_value
96    return bytes_value.decode('utf-8')
97
98def get_target_binary_path(arch, binary_name):
99    if arch == 'aarch64':
100        arch = 'arm64'
101    arch_dir = os.path.join(get_script_dir(), "bin", "android", arch)
102    if not os.path.isdir(arch_dir):
103        log_fatal("can't find arch directory: %s" % arch_dir)
104    binary_path = os.path.join(arch_dir, binary_name)
105    if not os.path.isfile(binary_path):
106        log_fatal("can't find binary: %s" % binary_path)
107    return binary_path
108
109
110def get_host_binary_path(binary_name):
111    dirname = os.path.join(get_script_dir(), 'bin')
112    if is_windows():
113        if binary_name.endswith('.so'):
114            binary_name = binary_name[0:-3] + '.dll'
115        elif '.' not in binary_name:
116            binary_name += '.exe'
117        dirname = os.path.join(dirname, 'windows')
118    elif sys.platform == 'darwin': # OSX
119        if binary_name.endswith('.so'):
120            binary_name = binary_name[0:-3] + '.dylib'
121        dirname = os.path.join(dirname, 'darwin')
122    else:
123        dirname = os.path.join(dirname, 'linux')
124    dirname = os.path.join(dirname, 'x86_64' if sys.maxsize > 2 ** 32 else 'x86')
125    binary_path = os.path.join(dirname, binary_name)
126    if not os.path.isfile(binary_path):
127        log_fatal("can't find binary: %s" % binary_path)
128    return binary_path
129
130
131def is_executable_available(executable, option='--help'):
132    """ Run an executable to see if it exists. """
133    try:
134        subproc = subprocess.Popen([executable, option], stdout=subprocess.PIPE,
135                                   stderr=subprocess.PIPE)
136        subproc.communicate()
137        return subproc.returncode == 0
138    except OSError:
139        return False
140
141DEFAULT_NDK_PATH = {
142    'darwin': 'Library/Android/sdk/ndk',
143    'linux': 'Android/Sdk/ndk',
144    'windows': 'AppData/Local/Android/sdk/ndk',
145}
146
147EXPECTED_TOOLS = {
148    'adb': {
149        'is_binutils': False,
150        'test_option': 'version',
151        'path_in_ndk': lambda _: '../platform-tools/adb',
152    },
153    'readelf': {
154        'is_binutils': True,
155        'accept_tool_without_arch': True,
156    },
157    'llvm-symbolizer': {
158        'is_binutils': False,
159        'path_in_ndk':
160            lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-symbolizer' % platform,
161    },
162    'objdump': {
163        'is_binutils': True,
164    },
165    'strip': {
166        'is_binutils': True,
167    },
168}
169
170def _get_binutils_path_in_ndk(toolname, arch, platform):
171    if not arch:
172        arch = 'arm64'
173    if arch == 'arm64':
174        name = 'aarch64-linux-android-' + toolname
175    elif arch == 'arm':
176        name = 'arm-linux-androideabi-' + toolname
177    elif arch == 'x86_64':
178        name = 'x86_64-linux-android-' + toolname
179    elif arch == 'x86':
180        name = 'i686-linux-android-' + toolname
181    else:
182        log_fatal('unexpected arch %s' % arch)
183    path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name)
184    return (name, path)
185
186def find_tool_path(toolname, ndk_path=None, arch=None):
187    if toolname not in EXPECTED_TOOLS:
188        return None
189    tool_info = EXPECTED_TOOLS[toolname]
190    is_binutils = tool_info['is_binutils']
191    test_option = tool_info.get('test_option', '--help')
192    platform = get_platform()
193    if is_binutils:
194        toolname_with_arch, path_in_ndk = _get_binutils_path_in_ndk(toolname, arch, platform)
195    else:
196        toolname_with_arch = toolname
197        path_in_ndk = tool_info['path_in_ndk'](platform)
198    path_in_ndk = path_in_ndk.replace('/', os.sep)
199
200    # 1. Find tool in the given ndk path.
201    if ndk_path:
202        path = os.path.join(ndk_path, path_in_ndk)
203        if is_executable_available(path, test_option):
204            return path
205
206    # 2. Find tool in the ndk directory containing simpleperf scripts.
207    path = os.path.join('..', path_in_ndk)
208    if is_executable_available(path, test_option):
209        return path
210
211    # 3. Find tool in the default ndk installation path.
212    home = os.environ.get('HOMEPATH') if is_windows() else os.environ.get('HOME')
213    if home:
214        default_ndk_path = os.path.join(home, DEFAULT_NDK_PATH[platform].replace('/', os.sep))
215        if os.path.isdir(default_ndk_path):
216            # Android Studio can install multiple ndk versions. Find the newest one.
217            ndk_version = None
218            for name in os.listdir(default_ndk_path):
219                if not ndk_version or ndk_version < name:
220                    ndk_version = name
221            if ndk_version:
222                path = os.path.join(default_ndk_path, ndk_version, path_in_ndk)
223                if is_executable_available(path, test_option):
224                    return path
225
226    # 4. Find tool in $PATH.
227    if is_executable_available(toolname_with_arch, test_option):
228        return toolname_with_arch
229
230    # 5. Find tool without arch in $PATH.
231    if is_binutils and tool_info.get('accept_tool_without_arch'):
232        if is_executable_available(toolname, test_option):
233            return toolname
234    return None
235
236
237class AdbHelper(object):
238    def __init__(self, enable_switch_to_root=True):
239        adb_path = find_tool_path('adb')
240        if not adb_path:
241            log_exit("Can't find adb in PATH environment.")
242        self.adb_path = adb_path
243        self.enable_switch_to_root = enable_switch_to_root
244
245
246    def run(self, adb_args):
247        return self.run_and_return_output(adb_args)[0]
248
249
250    def run_and_return_output(self, adb_args, log_output=True, log_stderr=True):
251        adb_args = [self.adb_path] + adb_args
252        log_debug('run adb cmd: %s' % adb_args)
253        subproc = subprocess.Popen(adb_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
254        stdout_data, stderr_data = subproc.communicate()
255        stdout_data = bytes_to_str(stdout_data)
256        stderr_data = bytes_to_str(stderr_data)
257        returncode = subproc.returncode
258        result = (returncode == 0)
259        if log_output and stdout_data and adb_args[1] != 'push' and adb_args[1] != 'pull':
260            log_debug(stdout_data)
261        if log_stderr and stderr_data:
262            log_warning(stderr_data)
263        log_debug('run adb cmd: %s  [result %s]' % (adb_args, result))
264        return (result, stdout_data)
265
266    def check_run(self, adb_args):
267        self.check_run_and_return_output(adb_args)
268
269
270    def check_run_and_return_output(self, adb_args, stdout_file=None, log_output=True):
271        result, stdoutdata = self.run_and_return_output(adb_args, stdout_file, log_output)
272        if not result:
273            log_exit('run "adb %s" failed' % adb_args)
274        return stdoutdata
275
276
277    def _unroot(self):
278        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
279        if not result:
280            return
281        if 'root' not in stdoutdata:
282            return
283        log_info('unroot adb')
284        self.run(['unroot'])
285        self.run(['wait-for-device'])
286        time.sleep(1)
287
288
289    def switch_to_root(self):
290        if not self.enable_switch_to_root:
291            self._unroot()
292            return False
293        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
294        if not result:
295            return False
296        if 'root' in stdoutdata:
297            return True
298        build_type = self.get_property('ro.build.type')
299        if build_type == 'user':
300            return False
301        self.run(['root'])
302        time.sleep(1)
303        self.run(['wait-for-device'])
304        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
305        return result and 'root' in stdoutdata
306
307    def get_property(self, name):
308        result, stdoutdata = self.run_and_return_output(['shell', 'getprop', name])
309        return stdoutdata if result else None
310
311    def set_property(self, name, value):
312        return self.run(['shell', 'setprop', name, value])
313
314
315    def get_device_arch(self):
316        output = self.check_run_and_return_output(['shell', 'uname', '-m'])
317        if 'aarch64' in output:
318            return 'arm64'
319        if 'arm' in output:
320            return 'arm'
321        if 'x86_64' in output:
322            return 'x86_64'
323        if '86' in output:
324            return 'x86'
325        log_fatal('unsupported architecture: %s' % output.strip())
326        return ''
327
328
329    def get_android_version(self):
330        """ Get Android version on device, like 7 is for Android N, 8 is for Android O."""
331        build_version = self.get_property('ro.build.version.release')
332        android_version = 0
333        if build_version:
334            if not build_version[0].isdigit():
335                c = build_version[0].upper()
336                if c.isupper() and c >= 'L':
337                    android_version = ord(c) - ord('L') + 5
338            else:
339                strs = build_version.split('.')
340                if strs:
341                    android_version = int(strs[0])
342        return android_version
343
344
345def flatten_arg_list(arg_list):
346    res = []
347    if arg_list:
348        for items in arg_list:
349            res += items
350    return res
351
352
353def remove(dir_or_file):
354    if os.path.isfile(dir_or_file):
355        os.remove(dir_or_file)
356    elif os.path.isdir(dir_or_file):
357        shutil.rmtree(dir_or_file, ignore_errors=True)
358
359
360def open_report_in_browser(report_path):
361    if is_darwin():
362        # On darwin 10.12.6, webbrowser can't open browser, so try `open` cmd first.
363        try:
364            subprocess.check_call(['open', report_path])
365            return
366        except subprocess.CalledProcessError:
367            pass
368    import webbrowser
369    try:
370        # Try to open the report with Chrome
371        browser = webbrowser.get('google-chrome')
372        browser.open(report_path, new=0, autoraise=True)
373    except webbrowser.Error:
374        # webbrowser.get() doesn't work well on darwin/windows.
375        webbrowser.open_new_tab(report_path)
376
377def is_elf_file(path):
378    if os.path.isfile(path):
379        with open(path, 'rb') as fh:
380            return fh.read(4) == b'\x7fELF'
381    return False
382
383def find_real_dso_path(dso_path_in_record_file, binary_cache_path):
384    """ Given the path of a shared library in perf.data, find its real path in the file system. """
385    if binary_cache_path:
386        tmp_path = os.path.join(binary_cache_path, dso_path_in_record_file[1:])
387        if is_elf_file(tmp_path):
388            return tmp_path
389    if is_elf_file(dso_path_in_record_file):
390        return dso_path_in_record_file
391    return None
392
393
394class Addr2Nearestline(object):
395    """ Use llvm-symbolizer to convert (dso_path, func_addr, addr) to (source_file, line).
396        For instructions generated by C++ compilers without a matching statement in source code
397        (like stack corruption check, switch optimization, etc.), addr2line can't generate
398        line information. However, we want to assign the instruction to the nearest line before
399        the instruction (just like objdump -dl). So we use below strategy:
400        Instead of finding the exact line of the instruction in an address, we find the nearest
401        line to the instruction in an address. If an address doesn't have a line info, we find
402        the line info of address - 1. If still no line info, then use address - 2, address - 3,
403        etc.
404
405        The implementation steps are as below:
406        1. Collect all (dso_path, func_addr, addr) requests before converting. This saves the
407        times to call addr2line.
408        2. Convert addrs to (source_file, line) pairs for each dso_path as below:
409          2.1 Check if the dso_path has .debug_line. If not, omit its conversion.
410          2.2 Get arch of the dso_path, and decide the addr_step for it. addr_step is the step we
411          change addr each time. For example, since instructions of arm64 are all 4 bytes long,
412          addr_step for arm64 can be 4.
413          2.3 Use addr2line to find line info for each addr in the dso_path.
414          2.4 For each addr without line info, use addr2line to find line info for
415              range(addr - addr_step, addr - addr_step * 4 - 1, -addr_step).
416          2.5 For each addr without line info, use addr2line to find line info for
417              range(addr - addr_step * 5, addr - addr_step * 128 - 1, -addr_step).
418              (128 is a guess number. A nested switch statement in
419               system/core/demangle/Demangler.cpp has >300 bytes without line info in arm64.)
420    """
421    class Dso(object):
422        """ Info of a dynamic shared library.
423            addrs: a map from address to Addr object in this dso.
424        """
425        def __init__(self):
426            self.addrs = {}
427
428    class Addr(object):
429        """ Info of an addr request.
430            func_addr: start_addr of the function containing addr.
431            source_lines: a list of [file_id, line_number] for addr.
432                          source_lines[:-1] are all for inlined functions.
433        """
434        def __init__(self, func_addr):
435            self.func_addr = func_addr
436            self.source_lines = None
437
438    def __init__(self, ndk_path, binary_cache_path, with_function_name):
439        self.symbolizer_path = find_tool_path('llvm-symbolizer', ndk_path)
440        if not self.symbolizer_path:
441            log_exit("Can't find llvm-symbolizer. Please set ndk path with --ndk_path option.")
442        self.readelf = ReadElf(ndk_path)
443        self.dso_map = {}  # map from dso_path to Dso.
444        self.binary_cache_path = binary_cache_path
445        self.with_function_name = with_function_name
446        # Saving file names for each addr takes a lot of memory. So we store file ids in Addr,
447        # and provide data structures connecting file id and file name here.
448        self.file_name_to_id = {}
449        self.file_id_to_name = []
450        self.func_name_to_id = {}
451        self.func_id_to_name = []
452
453    def add_addr(self, dso_path, func_addr, addr):
454        dso = self.dso_map.get(dso_path)
455        if dso is None:
456            dso = self.dso_map[dso_path] = self.Dso()
457        if addr not in dso.addrs:
458            dso.addrs[addr] = self.Addr(func_addr)
459
460    def convert_addrs_to_lines(self):
461        for dso_path in self.dso_map:
462            self._convert_addrs_in_one_dso(dso_path, self.dso_map[dso_path])
463
464    def _convert_addrs_in_one_dso(self, dso_path, dso):
465        real_path = find_real_dso_path(dso_path, self.binary_cache_path)
466        if not real_path:
467            if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']:
468                log_debug("Can't find dso %s" % dso_path)
469            return
470
471        if not self._check_debug_line_section(real_path):
472            log_debug("file %s doesn't contain .debug_line section." % real_path)
473            return
474
475        addr_step = self._get_addr_step(real_path)
476        self._collect_line_info(dso, real_path, [0])
477        self._collect_line_info(dso, real_path, range(-addr_step, -addr_step * 4 - 1, -addr_step))
478        self._collect_line_info(dso, real_path,
479                                range(-addr_step * 5, -addr_step * 128 - 1, -addr_step))
480
481    def _check_debug_line_section(self, real_path):
482        return '.debug_line' in self.readelf.get_sections(real_path)
483
484    def _get_addr_step(self, real_path):
485        arch = self.readelf.get_arch(real_path)
486        if arch == 'arm64':
487            return 4
488        if arch == 'arm':
489            return 2
490        return 1
491
492    def _collect_line_info(self, dso, real_path, addr_shifts):
493        """ Use addr2line to get line info in a dso, with given addr shifts. """
494        # 1. Collect addrs to send to addr2line.
495        addr_set = set()
496        for addr in dso.addrs:
497            addr_obj = dso.addrs[addr]
498            if addr_obj.source_lines:  # already has source line, no need to search.
499                continue
500            for shift in addr_shifts:
501                # The addr after shift shouldn't change to another function.
502                shifted_addr = max(addr + shift, addr_obj.func_addr)
503                addr_set.add(shifted_addr)
504                if shifted_addr == addr_obj.func_addr:
505                    break
506        if not addr_set:
507            return
508        addr_request = '\n'.join(['0x%x' % addr for addr in sorted(addr_set)])
509
510        # 2. Use addr2line to collect line info.
511        try:
512            subproc = subprocess.Popen(self._build_symbolizer_args(real_path),
513                                       stdin=subprocess.PIPE, stdout=subprocess.PIPE)
514            (stdoutdata, _) = subproc.communicate(str_to_bytes(addr_request))
515            stdoutdata = bytes_to_str(stdoutdata)
516        except OSError:
517            return
518        addr_map = {}
519        cur_line_list = None
520        need_function_name = self.with_function_name
521        cur_function_name = None
522        for line in stdoutdata.strip().split('\n'):
523            line = line.strip()
524            if not line:
525                continue
526            if line[:2] == '0x':
527                # a new address
528                cur_line_list = addr_map[int(line, 16)] = []
529            elif need_function_name:
530                cur_function_name = line.strip()
531                need_function_name = False
532            else:
533                need_function_name = self.with_function_name
534                if cur_line_list is None:
535                    continue
536                file_path, line_number = self._parse_source_location(line)
537                if not file_path or not line_number:
538                    # An addr can have a list of (file, line), when the addr belongs to an inlined
539                    # function. Sometimes only part of the list has ? mark. In this case, we think
540                    # the line info is valid if the first line doesn't have ? mark.
541                    if not cur_line_list:
542                        cur_line_list = None
543                    continue
544                file_id = self._get_file_id(file_path)
545                if self.with_function_name:
546                    func_id = self._get_func_id(cur_function_name)
547                    cur_line_list.append((file_id, line_number, func_id))
548                else:
549                    cur_line_list.append((file_id, line_number))
550
551        # 3. Fill line info in dso.addrs.
552        for addr in dso.addrs:
553            addr_obj = dso.addrs[addr]
554            if addr_obj.source_lines:
555                continue
556            for shift in addr_shifts:
557                shifted_addr = max(addr + shift, addr_obj.func_addr)
558                lines = addr_map.get(shifted_addr)
559                if lines:
560                    addr_obj.source_lines = lines
561                    break
562                if shifted_addr == addr_obj.func_addr:
563                    break
564
565    def _build_symbolizer_args(self, binary_path):
566        args = [self.symbolizer_path, '-print-address', '-inlining', '-obj=%s' % binary_path]
567        if self.with_function_name:
568            args += ['-functions=linkage', '-demangle']
569        else:
570            args.append('-functions=none')
571        return args
572
573    def _parse_source_location(self, line):
574        file_path, line_number = None, None
575        # Handle lines in format filename:line:column, like "runtest/two_functions.cpp:14:25".
576        # Filename may contain ':' like "C:\Users\...\file".
577        items = line.rsplit(':', 2)
578        if len(items) == 3:
579            file_path, line_number = items[:2]
580        if not file_path or ('?' in file_path) or not line_number or ('?' in line_number):
581            return None, None
582        try:
583            line_number = int(line_number)
584        except ValueError:
585            return None, None
586        return file_path, line_number
587
588    def _get_file_id(self, file_path):
589        file_id = self.file_name_to_id.get(file_path)
590        if file_id is None:
591            file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name)
592            self.file_id_to_name.append(file_path)
593        return file_id
594
595    def _get_func_id(self, func_name):
596        func_id = self.func_name_to_id.get(func_name)
597        if func_id is None:
598            func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name)
599            self.func_id_to_name.append(func_name)
600        return func_id
601
602    def get_dso(self, dso_path):
603        return self.dso_map.get(dso_path)
604
605    def get_addr_source(self, dso, addr):
606        source = dso.addrs[addr].source_lines
607        if source is None:
608            return None
609        if self.with_function_name:
610            return [(self.file_id_to_name[file_id], line, self.func_id_to_name[func_id])
611                    for (file_id, line, func_id) in source]
612        return [(self.file_id_to_name[file_id], line) for (file_id, line) in source]
613
614
615class SourceFileSearcher(object):
616    """ Find source file paths in the file system.
617        The file paths reported by addr2line are the paths stored in debug sections
618        of shared libraries. And we need to convert them to file paths in the file
619        system. It is done in below steps:
620        1. Collect all file paths under the provided source_dirs. The suffix of a
621           source file should contain one of below:
622            h: for C/C++ header files.
623            c: for C/C++ source files.
624            java: for Java source files.
625            kt: for Kotlin source files.
626        2. Given an abstract_path reported by addr2line, select the best real path
627           as below:
628           2.1 Find all real paths with the same file name as the abstract path.
629           2.2 Select the real path having the longest common suffix with the abstract path.
630    """
631
632    SOURCE_FILE_EXTS = {'.h', '.hh', '.H', '.hxx', '.hpp', '.h++',
633                        '.c', '.cc', '.C', '.cxx', '.cpp', '.c++',
634                        '.java', '.kt'}
635
636    @classmethod
637    def is_source_filename(cls, filename):
638        ext = os.path.splitext(filename)[1]
639        return ext in cls.SOURCE_FILE_EXTS
640
641    def __init__(self, source_dirs):
642        # Map from filename to a list of reversed directory path containing filename.
643        self.filename_to_rparents = {}
644        self._collect_paths(source_dirs)
645
646    def _collect_paths(self, source_dirs):
647        for source_dir in source_dirs:
648            for parent, _, file_names in os.walk(source_dir):
649                rparent = None
650                for file_name in file_names:
651                    if self.is_source_filename(file_name):
652                        rparents = self.filename_to_rparents.get(file_name)
653                        if rparents is None:
654                            rparents = self.filename_to_rparents[file_name] = []
655                        if rparent is None:
656                            rparent = parent[::-1]
657                        rparents.append(rparent)
658
659    def get_real_path(self, abstract_path):
660        abstract_path = abstract_path.replace('/', os.sep)
661        abstract_parent, file_name = os.path.split(abstract_path)
662        abstract_rparent = abstract_parent[::-1]
663        real_rparents = self.filename_to_rparents.get(file_name)
664        if real_rparents is None:
665            return None
666        best_matched_rparent = None
667        best_common_length = -1
668        for real_rparent in real_rparents:
669            length = len(os.path.commonprefix((real_rparent, abstract_rparent)))
670            if length > best_common_length:
671                best_common_length = length
672                best_matched_rparent = real_rparent
673        if best_matched_rparent is None:
674            return None
675        return os.path.join(best_matched_rparent[::-1], file_name)
676
677
678class Objdump(object):
679    """ A wrapper of objdump to disassemble code. """
680    def __init__(self, ndk_path, binary_cache_path):
681        self.ndk_path = ndk_path
682        self.binary_cache_path = binary_cache_path
683        self.readelf = ReadElf(ndk_path)
684        self.objdump_paths = {}
685
686    def get_dso_info(self, dso_path):
687        real_path = find_real_dso_path(dso_path, self.binary_cache_path)
688        if not real_path:
689            return None
690        arch = self.readelf.get_arch(real_path)
691        if arch == 'unknown':
692            return None
693        return (real_path, arch)
694
695    def disassemble_code(self, dso_info, start_addr, addr_len):
696        """ Disassemble [start_addr, start_addr + addr_len] of dso_path.
697            Return a list of pair (disassemble_code_line, addr).
698        """
699        real_path, arch = dso_info
700        objdump_path = self.objdump_paths.get(arch)
701        if not objdump_path:
702            objdump_path = find_tool_path('objdump', self.ndk_path, arch)
703            if not objdump_path:
704                log_exit("Can't find objdump. Please set ndk path with --ndk_path option.")
705            self.objdump_paths[arch] = objdump_path
706
707        # 3. Run objdump.
708        args = [objdump_path, '-dlC', '--no-show-raw-insn',
709                '--start-address=0x%x' % start_addr,
710                '--stop-address=0x%x' % (start_addr + addr_len),
711                real_path]
712        try:
713            subproc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
714            (stdoutdata, _) = subproc.communicate()
715            stdoutdata = bytes_to_str(stdoutdata)
716        except OSError:
717            return None
718
719        if not stdoutdata:
720            return None
721        result = []
722        for line in stdoutdata.split('\n'):
723            line = line.rstrip()  # Remove '\r' on Windows.
724            items = line.split(':', 1)
725            try:
726                addr = int(items[0], 16)
727            except ValueError:
728                addr = 0
729            result.append((line, addr))
730        return result
731
732
733class ReadElf(object):
734    """ A wrapper of readelf. """
735    def __init__(self, ndk_path):
736        self.readelf_path = find_tool_path('readelf', ndk_path)
737        if not self.readelf_path:
738            log_exit("Can't find readelf. Please set ndk path with --ndk_path option.")
739
740    def get_arch(self, elf_file_path):
741        """ Get arch of an elf file. """
742        if is_elf_file(elf_file_path):
743            try:
744                output = subprocess.check_output([self.readelf_path, '-h', elf_file_path])
745                output = bytes_to_str(output)
746                if output.find('AArch64') != -1:
747                    return 'arm64'
748                if output.find('ARM') != -1:
749                    return 'arm'
750                if output.find('X86-64') != -1:
751                    return 'x86_64'
752                if output.find('80386') != -1:
753                    return 'x86'
754            except subprocess.CalledProcessError:
755                pass
756        return 'unknown'
757
758    def get_build_id(self, elf_file_path, with_padding=True):
759        """ Get build id of an elf file. """
760        if is_elf_file(elf_file_path):
761            try:
762                output = subprocess.check_output([self.readelf_path, '-n', elf_file_path])
763                output = bytes_to_str(output)
764                result = re.search(r'Build ID:\s*(\S+)', output)
765                if result:
766                    build_id = result.group(1)
767                    if with_padding:
768                        build_id = self.pad_build_id(build_id)
769                    return build_id
770            except subprocess.CalledProcessError:
771                pass
772        return ""
773
774    @staticmethod
775    def pad_build_id(build_id):
776        """ Pad build id to 40 hex numbers (20 bytes). """
777        if len(build_id) < 40:
778            build_id += '0' * (40 - len(build_id))
779        else:
780            build_id = build_id[:40]
781        return '0x' + build_id
782
783    def get_sections(self, elf_file_path):
784        """ Get sections of an elf file. """
785        section_names = []
786        if is_elf_file(elf_file_path):
787            try:
788                output = subprocess.check_output([self.readelf_path, '-SW', elf_file_path])
789                output = bytes_to_str(output)
790                for line in output.split('\n'):
791                    # Parse line like:" [ 1] .note.android.ident NOTE  0000000000400190 ...".
792                    result = re.search(r'^\s+\[\s*\d+\]\s(.+?)\s', line)
793                    if result:
794                        section_name = result.group(1).strip()
795                        if section_name:
796                            section_names.append(section_name)
797            except subprocess.CalledProcessError:
798                pass
799        return section_names
800
801def extant_dir(arg):
802    """ArgumentParser type that only accepts extant directories.
803
804    Args:
805        arg: The string argument given on the command line.
806    Returns: The argument as a realpath.
807    Raises:
808        argparse.ArgumentTypeError: The given path isn't a directory.
809    """
810    path = os.path.realpath(arg)
811    if not os.path.isdir(path):
812        raise argparse.ArgumentTypeError('{} is not a directory.'.format(path))
813    return path
814
815def extant_file(arg):
816    """ArgumentParser type that only accepts extant files.
817
818    Args:
819        arg: The string argument given on the command line.
820    Returns: The argument as a realpath.
821    Raises:
822        argparse.ArgumentTypeError: The given path isn't a file.
823    """
824    path = os.path.realpath(arg)
825    if not os.path.isfile(path):
826        raise argparse.ArgumentTypeError('{} is not a file.'.format(path))
827    return path
828
829logging.getLogger().setLevel(logging.DEBUG)
830