1#!/usr/bin/env python 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""utils.py: export utility functions. 19""" 20 21from __future__ import print_function 22import argparse 23import logging 24import os 25import os.path 26import re 27import shutil 28import subprocess 29import sys 30import time 31 32def get_script_dir(): 33 return os.path.dirname(os.path.realpath(__file__)) 34 35def is_windows(): 36 return sys.platform == 'win32' or sys.platform == 'cygwin' 37 38def is_darwin(): 39 return sys.platform == 'darwin' 40 41def get_platform(): 42 if is_windows(): 43 return 'windows' 44 if is_darwin(): 45 return 'darwin' 46 return 'linux' 47 48def is_python3(): 49 return sys.version_info >= (3, 0) 50 51 52def log_debug(msg): 53 logging.debug(msg) 54 55 56def log_info(msg): 57 logging.info(msg) 58 59 60def log_warning(msg): 61 logging.warning(msg) 62 63 64def log_fatal(msg): 65 raise Exception(msg) 66 67def log_exit(msg): 68 sys.exit(msg) 69 70def disable_debug_log(): 71 logging.getLogger().setLevel(logging.WARN) 72 73def set_log_level(level_name): 74 if level_name == 'debug': 75 level = logging.DEBUG 76 elif level_name == 'info': 77 level = logging.INFO 78 elif level_name == 'warning': 79 level = logging.WARNING 80 else: 81 log_fatal('unknown log level: %s' % level_name) 82 logging.getLogger().setLevel(level) 83 84def str_to_bytes(str_value): 85 if not is_python3(): 86 return str_value 87 # In python 3, str are wide strings whereas the C api expects 8 bit strings, 88 # hence we have to convert. For now using utf-8 as the encoding. 89 return str_value.encode('utf-8') 90 91def bytes_to_str(bytes_value): 92 if not bytes_value: 93 return '' 94 if not is_python3(): 95 return bytes_value 96 return bytes_value.decode('utf-8') 97 98def get_target_binary_path(arch, binary_name): 99 if arch == 'aarch64': 100 arch = 'arm64' 101 arch_dir = os.path.join(get_script_dir(), "bin", "android", arch) 102 if not os.path.isdir(arch_dir): 103 log_fatal("can't find arch directory: %s" % arch_dir) 104 binary_path = os.path.join(arch_dir, binary_name) 105 if not os.path.isfile(binary_path): 106 log_fatal("can't find binary: %s" % binary_path) 107 return binary_path 108 109 110def get_host_binary_path(binary_name): 111 dirname = os.path.join(get_script_dir(), 'bin') 112 if is_windows(): 113 if binary_name.endswith('.so'): 114 binary_name = binary_name[0:-3] + '.dll' 115 elif '.' not in binary_name: 116 binary_name += '.exe' 117 dirname = os.path.join(dirname, 'windows') 118 elif sys.platform == 'darwin': # OSX 119 if binary_name.endswith('.so'): 120 binary_name = binary_name[0:-3] + '.dylib' 121 dirname = os.path.join(dirname, 'darwin') 122 else: 123 dirname = os.path.join(dirname, 'linux') 124 dirname = os.path.join(dirname, 'x86_64' if sys.maxsize > 2 ** 32 else 'x86') 125 binary_path = os.path.join(dirname, binary_name) 126 if not os.path.isfile(binary_path): 127 log_fatal("can't find binary: %s" % binary_path) 128 return binary_path 129 130 131def is_executable_available(executable, option='--help'): 132 """ Run an executable to see if it exists. """ 133 try: 134 subproc = subprocess.Popen([executable, option], stdout=subprocess.PIPE, 135 stderr=subprocess.PIPE) 136 subproc.communicate() 137 return subproc.returncode == 0 138 except OSError: 139 return False 140 141DEFAULT_NDK_PATH = { 142 'darwin': 'Library/Android/sdk/ndk', 143 'linux': 'Android/Sdk/ndk', 144 'windows': 'AppData/Local/Android/sdk/ndk', 145} 146 147EXPECTED_TOOLS = { 148 'adb': { 149 'is_binutils': False, 150 'test_option': 'version', 151 'path_in_ndk': lambda _: '../platform-tools/adb', 152 }, 153 'readelf': { 154 'is_binutils': True, 155 'accept_tool_without_arch': True, 156 }, 157 'llvm-symbolizer': { 158 'is_binutils': False, 159 'path_in_ndk': 160 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-symbolizer' % platform, 161 }, 162 'objdump': { 163 'is_binutils': True, 164 }, 165 'strip': { 166 'is_binutils': True, 167 }, 168} 169 170def _get_binutils_path_in_ndk(toolname, arch, platform): 171 if not arch: 172 arch = 'arm64' 173 if arch == 'arm64': 174 name = 'aarch64-linux-android-' + toolname 175 elif arch == 'arm': 176 name = 'arm-linux-androideabi-' + toolname 177 elif arch == 'x86_64': 178 name = 'x86_64-linux-android-' + toolname 179 elif arch == 'x86': 180 name = 'i686-linux-android-' + toolname 181 else: 182 log_fatal('unexpected arch %s' % arch) 183 path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name) 184 return (name, path) 185 186def find_tool_path(toolname, ndk_path=None, arch=None): 187 if toolname not in EXPECTED_TOOLS: 188 return None 189 tool_info = EXPECTED_TOOLS[toolname] 190 is_binutils = tool_info['is_binutils'] 191 test_option = tool_info.get('test_option', '--help') 192 platform = get_platform() 193 if is_binutils: 194 toolname_with_arch, path_in_ndk = _get_binutils_path_in_ndk(toolname, arch, platform) 195 else: 196 toolname_with_arch = toolname 197 path_in_ndk = tool_info['path_in_ndk'](platform) 198 path_in_ndk = path_in_ndk.replace('/', os.sep) 199 200 # 1. Find tool in the given ndk path. 201 if ndk_path: 202 path = os.path.join(ndk_path, path_in_ndk) 203 if is_executable_available(path, test_option): 204 return path 205 206 # 2. Find tool in the ndk directory containing simpleperf scripts. 207 path = os.path.join('..', path_in_ndk) 208 if is_executable_available(path, test_option): 209 return path 210 211 # 3. Find tool in the default ndk installation path. 212 home = os.environ.get('HOMEPATH') if is_windows() else os.environ.get('HOME') 213 if home: 214 default_ndk_path = os.path.join(home, DEFAULT_NDK_PATH[platform].replace('/', os.sep)) 215 if os.path.isdir(default_ndk_path): 216 # Android Studio can install multiple ndk versions. Find the newest one. 217 ndk_version = None 218 for name in os.listdir(default_ndk_path): 219 if not ndk_version or ndk_version < name: 220 ndk_version = name 221 if ndk_version: 222 path = os.path.join(default_ndk_path, ndk_version, path_in_ndk) 223 if is_executable_available(path, test_option): 224 return path 225 226 # 4. Find tool in $PATH. 227 if is_executable_available(toolname_with_arch, test_option): 228 return toolname_with_arch 229 230 # 5. Find tool without arch in $PATH. 231 if is_binutils and tool_info.get('accept_tool_without_arch'): 232 if is_executable_available(toolname, test_option): 233 return toolname 234 return None 235 236 237class AdbHelper(object): 238 def __init__(self, enable_switch_to_root=True): 239 adb_path = find_tool_path('adb') 240 if not adb_path: 241 log_exit("Can't find adb in PATH environment.") 242 self.adb_path = adb_path 243 self.enable_switch_to_root = enable_switch_to_root 244 245 246 def run(self, adb_args): 247 return self.run_and_return_output(adb_args)[0] 248 249 250 def run_and_return_output(self, adb_args, log_output=True, log_stderr=True): 251 adb_args = [self.adb_path] + adb_args 252 log_debug('run adb cmd: %s' % adb_args) 253 subproc = subprocess.Popen(adb_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 254 stdout_data, stderr_data = subproc.communicate() 255 stdout_data = bytes_to_str(stdout_data) 256 stderr_data = bytes_to_str(stderr_data) 257 returncode = subproc.returncode 258 result = (returncode == 0) 259 if log_output and stdout_data and adb_args[1] != 'push' and adb_args[1] != 'pull': 260 log_debug(stdout_data) 261 if log_stderr and stderr_data: 262 log_warning(stderr_data) 263 log_debug('run adb cmd: %s [result %s]' % (adb_args, result)) 264 return (result, stdout_data) 265 266 def check_run(self, adb_args): 267 self.check_run_and_return_output(adb_args) 268 269 270 def check_run_and_return_output(self, adb_args, stdout_file=None, log_output=True): 271 result, stdoutdata = self.run_and_return_output(adb_args, stdout_file, log_output) 272 if not result: 273 log_exit('run "adb %s" failed' % adb_args) 274 return stdoutdata 275 276 277 def _unroot(self): 278 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 279 if not result: 280 return 281 if 'root' not in stdoutdata: 282 return 283 log_info('unroot adb') 284 self.run(['unroot']) 285 self.run(['wait-for-device']) 286 time.sleep(1) 287 288 289 def switch_to_root(self): 290 if not self.enable_switch_to_root: 291 self._unroot() 292 return False 293 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 294 if not result: 295 return False 296 if 'root' in stdoutdata: 297 return True 298 build_type = self.get_property('ro.build.type') 299 if build_type == 'user': 300 return False 301 self.run(['root']) 302 time.sleep(1) 303 self.run(['wait-for-device']) 304 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 305 return result and 'root' in stdoutdata 306 307 def get_property(self, name): 308 result, stdoutdata = self.run_and_return_output(['shell', 'getprop', name]) 309 return stdoutdata if result else None 310 311 def set_property(self, name, value): 312 return self.run(['shell', 'setprop', name, value]) 313 314 315 def get_device_arch(self): 316 output = self.check_run_and_return_output(['shell', 'uname', '-m']) 317 if 'aarch64' in output: 318 return 'arm64' 319 if 'arm' in output: 320 return 'arm' 321 if 'x86_64' in output: 322 return 'x86_64' 323 if '86' in output: 324 return 'x86' 325 log_fatal('unsupported architecture: %s' % output.strip()) 326 return '' 327 328 329 def get_android_version(self): 330 """ Get Android version on device, like 7 is for Android N, 8 is for Android O.""" 331 build_version = self.get_property('ro.build.version.release') 332 android_version = 0 333 if build_version: 334 if not build_version[0].isdigit(): 335 c = build_version[0].upper() 336 if c.isupper() and c >= 'L': 337 android_version = ord(c) - ord('L') + 5 338 else: 339 strs = build_version.split('.') 340 if strs: 341 android_version = int(strs[0]) 342 return android_version 343 344 345def flatten_arg_list(arg_list): 346 res = [] 347 if arg_list: 348 for items in arg_list: 349 res += items 350 return res 351 352 353def remove(dir_or_file): 354 if os.path.isfile(dir_or_file): 355 os.remove(dir_or_file) 356 elif os.path.isdir(dir_or_file): 357 shutil.rmtree(dir_or_file, ignore_errors=True) 358 359 360def open_report_in_browser(report_path): 361 if is_darwin(): 362 # On darwin 10.12.6, webbrowser can't open browser, so try `open` cmd first. 363 try: 364 subprocess.check_call(['open', report_path]) 365 return 366 except subprocess.CalledProcessError: 367 pass 368 import webbrowser 369 try: 370 # Try to open the report with Chrome 371 browser = webbrowser.get('google-chrome') 372 browser.open(report_path, new=0, autoraise=True) 373 except webbrowser.Error: 374 # webbrowser.get() doesn't work well on darwin/windows. 375 webbrowser.open_new_tab(report_path) 376 377def is_elf_file(path): 378 if os.path.isfile(path): 379 with open(path, 'rb') as fh: 380 return fh.read(4) == b'\x7fELF' 381 return False 382 383def find_real_dso_path(dso_path_in_record_file, binary_cache_path): 384 """ Given the path of a shared library in perf.data, find its real path in the file system. """ 385 if binary_cache_path: 386 tmp_path = os.path.join(binary_cache_path, dso_path_in_record_file[1:]) 387 if is_elf_file(tmp_path): 388 return tmp_path 389 if is_elf_file(dso_path_in_record_file): 390 return dso_path_in_record_file 391 return None 392 393 394class Addr2Nearestline(object): 395 """ Use llvm-symbolizer to convert (dso_path, func_addr, addr) to (source_file, line). 396 For instructions generated by C++ compilers without a matching statement in source code 397 (like stack corruption check, switch optimization, etc.), addr2line can't generate 398 line information. However, we want to assign the instruction to the nearest line before 399 the instruction (just like objdump -dl). So we use below strategy: 400 Instead of finding the exact line of the instruction in an address, we find the nearest 401 line to the instruction in an address. If an address doesn't have a line info, we find 402 the line info of address - 1. If still no line info, then use address - 2, address - 3, 403 etc. 404 405 The implementation steps are as below: 406 1. Collect all (dso_path, func_addr, addr) requests before converting. This saves the 407 times to call addr2line. 408 2. Convert addrs to (source_file, line) pairs for each dso_path as below: 409 2.1 Check if the dso_path has .debug_line. If not, omit its conversion. 410 2.2 Get arch of the dso_path, and decide the addr_step for it. addr_step is the step we 411 change addr each time. For example, since instructions of arm64 are all 4 bytes long, 412 addr_step for arm64 can be 4. 413 2.3 Use addr2line to find line info for each addr in the dso_path. 414 2.4 For each addr without line info, use addr2line to find line info for 415 range(addr - addr_step, addr - addr_step * 4 - 1, -addr_step). 416 2.5 For each addr without line info, use addr2line to find line info for 417 range(addr - addr_step * 5, addr - addr_step * 128 - 1, -addr_step). 418 (128 is a guess number. A nested switch statement in 419 system/core/demangle/Demangler.cpp has >300 bytes without line info in arm64.) 420 """ 421 class Dso(object): 422 """ Info of a dynamic shared library. 423 addrs: a map from address to Addr object in this dso. 424 """ 425 def __init__(self): 426 self.addrs = {} 427 428 class Addr(object): 429 """ Info of an addr request. 430 func_addr: start_addr of the function containing addr. 431 source_lines: a list of [file_id, line_number] for addr. 432 source_lines[:-1] are all for inlined functions. 433 """ 434 def __init__(self, func_addr): 435 self.func_addr = func_addr 436 self.source_lines = None 437 438 def __init__(self, ndk_path, binary_cache_path, with_function_name): 439 self.symbolizer_path = find_tool_path('llvm-symbolizer', ndk_path) 440 if not self.symbolizer_path: 441 log_exit("Can't find llvm-symbolizer. Please set ndk path with --ndk_path option.") 442 self.readelf = ReadElf(ndk_path) 443 self.dso_map = {} # map from dso_path to Dso. 444 self.binary_cache_path = binary_cache_path 445 self.with_function_name = with_function_name 446 # Saving file names for each addr takes a lot of memory. So we store file ids in Addr, 447 # and provide data structures connecting file id and file name here. 448 self.file_name_to_id = {} 449 self.file_id_to_name = [] 450 self.func_name_to_id = {} 451 self.func_id_to_name = [] 452 453 def add_addr(self, dso_path, func_addr, addr): 454 dso = self.dso_map.get(dso_path) 455 if dso is None: 456 dso = self.dso_map[dso_path] = self.Dso() 457 if addr not in dso.addrs: 458 dso.addrs[addr] = self.Addr(func_addr) 459 460 def convert_addrs_to_lines(self): 461 for dso_path in self.dso_map: 462 self._convert_addrs_in_one_dso(dso_path, self.dso_map[dso_path]) 463 464 def _convert_addrs_in_one_dso(self, dso_path, dso): 465 real_path = find_real_dso_path(dso_path, self.binary_cache_path) 466 if not real_path: 467 if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']: 468 log_debug("Can't find dso %s" % dso_path) 469 return 470 471 if not self._check_debug_line_section(real_path): 472 log_debug("file %s doesn't contain .debug_line section." % real_path) 473 return 474 475 addr_step = self._get_addr_step(real_path) 476 self._collect_line_info(dso, real_path, [0]) 477 self._collect_line_info(dso, real_path, range(-addr_step, -addr_step * 4 - 1, -addr_step)) 478 self._collect_line_info(dso, real_path, 479 range(-addr_step * 5, -addr_step * 128 - 1, -addr_step)) 480 481 def _check_debug_line_section(self, real_path): 482 return '.debug_line' in self.readelf.get_sections(real_path) 483 484 def _get_addr_step(self, real_path): 485 arch = self.readelf.get_arch(real_path) 486 if arch == 'arm64': 487 return 4 488 if arch == 'arm': 489 return 2 490 return 1 491 492 def _collect_line_info(self, dso, real_path, addr_shifts): 493 """ Use addr2line to get line info in a dso, with given addr shifts. """ 494 # 1. Collect addrs to send to addr2line. 495 addr_set = set() 496 for addr in dso.addrs: 497 addr_obj = dso.addrs[addr] 498 if addr_obj.source_lines: # already has source line, no need to search. 499 continue 500 for shift in addr_shifts: 501 # The addr after shift shouldn't change to another function. 502 shifted_addr = max(addr + shift, addr_obj.func_addr) 503 addr_set.add(shifted_addr) 504 if shifted_addr == addr_obj.func_addr: 505 break 506 if not addr_set: 507 return 508 addr_request = '\n'.join(['0x%x' % addr for addr in sorted(addr_set)]) 509 510 # 2. Use addr2line to collect line info. 511 try: 512 subproc = subprocess.Popen(self._build_symbolizer_args(real_path), 513 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 514 (stdoutdata, _) = subproc.communicate(str_to_bytes(addr_request)) 515 stdoutdata = bytes_to_str(stdoutdata) 516 except OSError: 517 return 518 addr_map = {} 519 cur_line_list = None 520 need_function_name = self.with_function_name 521 cur_function_name = None 522 for line in stdoutdata.strip().split('\n'): 523 line = line.strip() 524 if not line: 525 continue 526 if line[:2] == '0x': 527 # a new address 528 cur_line_list = addr_map[int(line, 16)] = [] 529 elif need_function_name: 530 cur_function_name = line.strip() 531 need_function_name = False 532 else: 533 need_function_name = self.with_function_name 534 if cur_line_list is None: 535 continue 536 file_path, line_number = self._parse_source_location(line) 537 if not file_path or not line_number: 538 # An addr can have a list of (file, line), when the addr belongs to an inlined 539 # function. Sometimes only part of the list has ? mark. In this case, we think 540 # the line info is valid if the first line doesn't have ? mark. 541 if not cur_line_list: 542 cur_line_list = None 543 continue 544 file_id = self._get_file_id(file_path) 545 if self.with_function_name: 546 func_id = self._get_func_id(cur_function_name) 547 cur_line_list.append((file_id, line_number, func_id)) 548 else: 549 cur_line_list.append((file_id, line_number)) 550 551 # 3. Fill line info in dso.addrs. 552 for addr in dso.addrs: 553 addr_obj = dso.addrs[addr] 554 if addr_obj.source_lines: 555 continue 556 for shift in addr_shifts: 557 shifted_addr = max(addr + shift, addr_obj.func_addr) 558 lines = addr_map.get(shifted_addr) 559 if lines: 560 addr_obj.source_lines = lines 561 break 562 if shifted_addr == addr_obj.func_addr: 563 break 564 565 def _build_symbolizer_args(self, binary_path): 566 args = [self.symbolizer_path, '-print-address', '-inlining', '-obj=%s' % binary_path] 567 if self.with_function_name: 568 args += ['-functions=linkage', '-demangle'] 569 else: 570 args.append('-functions=none') 571 return args 572 573 def _parse_source_location(self, line): 574 file_path, line_number = None, None 575 # Handle lines in format filename:line:column, like "runtest/two_functions.cpp:14:25". 576 # Filename may contain ':' like "C:\Users\...\file". 577 items = line.rsplit(':', 2) 578 if len(items) == 3: 579 file_path, line_number = items[:2] 580 if not file_path or ('?' in file_path) or not line_number or ('?' in line_number): 581 return None, None 582 try: 583 line_number = int(line_number) 584 except ValueError: 585 return None, None 586 return file_path, line_number 587 588 def _get_file_id(self, file_path): 589 file_id = self.file_name_to_id.get(file_path) 590 if file_id is None: 591 file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name) 592 self.file_id_to_name.append(file_path) 593 return file_id 594 595 def _get_func_id(self, func_name): 596 func_id = self.func_name_to_id.get(func_name) 597 if func_id is None: 598 func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name) 599 self.func_id_to_name.append(func_name) 600 return func_id 601 602 def get_dso(self, dso_path): 603 return self.dso_map.get(dso_path) 604 605 def get_addr_source(self, dso, addr): 606 source = dso.addrs[addr].source_lines 607 if source is None: 608 return None 609 if self.with_function_name: 610 return [(self.file_id_to_name[file_id], line, self.func_id_to_name[func_id]) 611 for (file_id, line, func_id) in source] 612 return [(self.file_id_to_name[file_id], line) for (file_id, line) in source] 613 614 615class SourceFileSearcher(object): 616 """ Find source file paths in the file system. 617 The file paths reported by addr2line are the paths stored in debug sections 618 of shared libraries. And we need to convert them to file paths in the file 619 system. It is done in below steps: 620 1. Collect all file paths under the provided source_dirs. The suffix of a 621 source file should contain one of below: 622 h: for C/C++ header files. 623 c: for C/C++ source files. 624 java: for Java source files. 625 kt: for Kotlin source files. 626 2. Given an abstract_path reported by addr2line, select the best real path 627 as below: 628 2.1 Find all real paths with the same file name as the abstract path. 629 2.2 Select the real path having the longest common suffix with the abstract path. 630 """ 631 632 SOURCE_FILE_EXTS = {'.h', '.hh', '.H', '.hxx', '.hpp', '.h++', 633 '.c', '.cc', '.C', '.cxx', '.cpp', '.c++', 634 '.java', '.kt'} 635 636 @classmethod 637 def is_source_filename(cls, filename): 638 ext = os.path.splitext(filename)[1] 639 return ext in cls.SOURCE_FILE_EXTS 640 641 def __init__(self, source_dirs): 642 # Map from filename to a list of reversed directory path containing filename. 643 self.filename_to_rparents = {} 644 self._collect_paths(source_dirs) 645 646 def _collect_paths(self, source_dirs): 647 for source_dir in source_dirs: 648 for parent, _, file_names in os.walk(source_dir): 649 rparent = None 650 for file_name in file_names: 651 if self.is_source_filename(file_name): 652 rparents = self.filename_to_rparents.get(file_name) 653 if rparents is None: 654 rparents = self.filename_to_rparents[file_name] = [] 655 if rparent is None: 656 rparent = parent[::-1] 657 rparents.append(rparent) 658 659 def get_real_path(self, abstract_path): 660 abstract_path = abstract_path.replace('/', os.sep) 661 abstract_parent, file_name = os.path.split(abstract_path) 662 abstract_rparent = abstract_parent[::-1] 663 real_rparents = self.filename_to_rparents.get(file_name) 664 if real_rparents is None: 665 return None 666 best_matched_rparent = None 667 best_common_length = -1 668 for real_rparent in real_rparents: 669 length = len(os.path.commonprefix((real_rparent, abstract_rparent))) 670 if length > best_common_length: 671 best_common_length = length 672 best_matched_rparent = real_rparent 673 if best_matched_rparent is None: 674 return None 675 return os.path.join(best_matched_rparent[::-1], file_name) 676 677 678class Objdump(object): 679 """ A wrapper of objdump to disassemble code. """ 680 def __init__(self, ndk_path, binary_cache_path): 681 self.ndk_path = ndk_path 682 self.binary_cache_path = binary_cache_path 683 self.readelf = ReadElf(ndk_path) 684 self.objdump_paths = {} 685 686 def get_dso_info(self, dso_path): 687 real_path = find_real_dso_path(dso_path, self.binary_cache_path) 688 if not real_path: 689 return None 690 arch = self.readelf.get_arch(real_path) 691 if arch == 'unknown': 692 return None 693 return (real_path, arch) 694 695 def disassemble_code(self, dso_info, start_addr, addr_len): 696 """ Disassemble [start_addr, start_addr + addr_len] of dso_path. 697 Return a list of pair (disassemble_code_line, addr). 698 """ 699 real_path, arch = dso_info 700 objdump_path = self.objdump_paths.get(arch) 701 if not objdump_path: 702 objdump_path = find_tool_path('objdump', self.ndk_path, arch) 703 if not objdump_path: 704 log_exit("Can't find objdump. Please set ndk path with --ndk_path option.") 705 self.objdump_paths[arch] = objdump_path 706 707 # 3. Run objdump. 708 args = [objdump_path, '-dlC', '--no-show-raw-insn', 709 '--start-address=0x%x' % start_addr, 710 '--stop-address=0x%x' % (start_addr + addr_len), 711 real_path] 712 try: 713 subproc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 714 (stdoutdata, _) = subproc.communicate() 715 stdoutdata = bytes_to_str(stdoutdata) 716 except OSError: 717 return None 718 719 if not stdoutdata: 720 return None 721 result = [] 722 for line in stdoutdata.split('\n'): 723 line = line.rstrip() # Remove '\r' on Windows. 724 items = line.split(':', 1) 725 try: 726 addr = int(items[0], 16) 727 except ValueError: 728 addr = 0 729 result.append((line, addr)) 730 return result 731 732 733class ReadElf(object): 734 """ A wrapper of readelf. """ 735 def __init__(self, ndk_path): 736 self.readelf_path = find_tool_path('readelf', ndk_path) 737 if not self.readelf_path: 738 log_exit("Can't find readelf. Please set ndk path with --ndk_path option.") 739 740 def get_arch(self, elf_file_path): 741 """ Get arch of an elf file. """ 742 if is_elf_file(elf_file_path): 743 try: 744 output = subprocess.check_output([self.readelf_path, '-h', elf_file_path]) 745 output = bytes_to_str(output) 746 if output.find('AArch64') != -1: 747 return 'arm64' 748 if output.find('ARM') != -1: 749 return 'arm' 750 if output.find('X86-64') != -1: 751 return 'x86_64' 752 if output.find('80386') != -1: 753 return 'x86' 754 except subprocess.CalledProcessError: 755 pass 756 return 'unknown' 757 758 def get_build_id(self, elf_file_path, with_padding=True): 759 """ Get build id of an elf file. """ 760 if is_elf_file(elf_file_path): 761 try: 762 output = subprocess.check_output([self.readelf_path, '-n', elf_file_path]) 763 output = bytes_to_str(output) 764 result = re.search(r'Build ID:\s*(\S+)', output) 765 if result: 766 build_id = result.group(1) 767 if with_padding: 768 build_id = self.pad_build_id(build_id) 769 return build_id 770 except subprocess.CalledProcessError: 771 pass 772 return "" 773 774 @staticmethod 775 def pad_build_id(build_id): 776 """ Pad build id to 40 hex numbers (20 bytes). """ 777 if len(build_id) < 40: 778 build_id += '0' * (40 - len(build_id)) 779 else: 780 build_id = build_id[:40] 781 return '0x' + build_id 782 783 def get_sections(self, elf_file_path): 784 """ Get sections of an elf file. """ 785 section_names = [] 786 if is_elf_file(elf_file_path): 787 try: 788 output = subprocess.check_output([self.readelf_path, '-SW', elf_file_path]) 789 output = bytes_to_str(output) 790 for line in output.split('\n'): 791 # Parse line like:" [ 1] .note.android.ident NOTE 0000000000400190 ...". 792 result = re.search(r'^\s+\[\s*\d+\]\s(.+?)\s', line) 793 if result: 794 section_name = result.group(1).strip() 795 if section_name: 796 section_names.append(section_name) 797 except subprocess.CalledProcessError: 798 pass 799 return section_names 800 801def extant_dir(arg): 802 """ArgumentParser type that only accepts extant directories. 803 804 Args: 805 arg: The string argument given on the command line. 806 Returns: The argument as a realpath. 807 Raises: 808 argparse.ArgumentTypeError: The given path isn't a directory. 809 """ 810 path = os.path.realpath(arg) 811 if not os.path.isdir(path): 812 raise argparse.ArgumentTypeError('{} is not a directory.'.format(path)) 813 return path 814 815def extant_file(arg): 816 """ArgumentParser type that only accepts extant files. 817 818 Args: 819 arg: The string argument given on the command line. 820 Returns: The argument as a realpath. 821 Raises: 822 argparse.ArgumentTypeError: The given path isn't a file. 823 """ 824 path = os.path.realpath(arg) 825 if not os.path.isfile(path): 826 raise argparse.ArgumentTypeError('{} is not a file.'.format(path)) 827 return path 828 829logging.getLogger().setLevel(logging.DEBUG) 830