1#!/usr/bin/env python 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""annotate.py: annotate source files based on perf.data. 19""" 20 21 22import argparse 23import os 24import os.path 25import shutil 26 27from simpleperf_report_lib import ReportLib 28from utils import log_info, log_warning, log_exit 29from utils import Addr2Nearestline, extant_dir, flatten_arg_list, is_windows, SourceFileSearcher 30 31class SourceLine(object): 32 def __init__(self, file_id, function, line): 33 self.file = file_id 34 self.function = function 35 self.line = line 36 37 @property 38 def file_key(self): 39 return self.file 40 41 @property 42 def function_key(self): 43 return (self.file, self.function) 44 45 @property 46 def line_key(self): 47 return (self.file, self.line) 48 49 50class Addr2Line(object): 51 """collect information of how to map [dso_name, vaddr] to [source_file:line]. 52 """ 53 def __init__(self, ndk_path, binary_cache_path, source_dirs): 54 self.addr2line = Addr2Nearestline(ndk_path, binary_cache_path, True) 55 self.source_searcher = SourceFileSearcher(source_dirs) 56 57 def add_addr(self, dso_path, func_addr, addr): 58 self.addr2line.add_addr(dso_path, func_addr, addr) 59 60 def convert_addrs_to_lines(self): 61 self.addr2line.convert_addrs_to_lines() 62 63 def get_sources(self, dso_path, addr): 64 dso = self.addr2line.get_dso(dso_path) 65 if not dso: 66 return [] 67 source = self.addr2line.get_addr_source(dso, addr) 68 if not source: 69 return [] 70 result = [] 71 for (source_file, source_line, function_name) in source: 72 source_file_path = self.source_searcher.get_real_path(source_file) 73 if not source_file_path: 74 source_file_path = source_file 75 result.append(SourceLine(source_file_path, function_name, source_line)) 76 return result 77 78 79class Period(object): 80 """event count information. It can be used to represent event count 81 of a line, a function, a source file, or a binary. It contains two 82 parts: period and acc_period. 83 When used for a line, period is the event count occurred when running 84 that line, acc_period is the accumulated event count occurred when 85 running that line and functions called by that line. Same thing applies 86 when it is used for a function, a source file, or a binary. 87 """ 88 def __init__(self, period=0, acc_period=0): 89 self.period = period 90 self.acc_period = acc_period 91 92 93 def __iadd__(self, other): 94 self.period += other.period 95 self.acc_period += other.acc_period 96 return self 97 98 99class DsoPeriod(object): 100 """Period for each shared library""" 101 def __init__(self, dso_name): 102 self.dso_name = dso_name 103 self.period = Period() 104 105 106 def add_period(self, period): 107 self.period += period 108 109 110class FilePeriod(object): 111 """Period for each source file""" 112 def __init__(self, file_id): 113 self.file = file_id 114 self.period = Period() 115 # Period for each line in the file. 116 self.line_dict = {} 117 # Period for each function in the source file. 118 self.function_dict = {} 119 120 121 def add_period(self, period): 122 self.period += period 123 124 125 def add_line_period(self, line, period): 126 a = self.line_dict.get(line) 127 if a is None: 128 self.line_dict[line] = a = Period() 129 a += period 130 131 132 def add_function_period(self, function_name, function_start_line, period): 133 a = self.function_dict.get(function_name) 134 if not a: 135 if function_start_line is None: 136 function_start_line = -1 137 self.function_dict[function_name] = a = [function_start_line, Period()] 138 a[1] += period 139 140 141class SourceFileAnnotator(object): 142 """group code for annotating source files""" 143 def __init__(self, config): 144 # check config variables 145 config_names = ['perf_data_list', 'source_dirs', 'comm_filters', 146 'pid_filters', 'tid_filters', 'dso_filters', 'ndk_path'] 147 for name in config_names: 148 if name not in config: 149 log_exit('config [%s] is missing' % name) 150 symfs_dir = 'binary_cache' 151 if not os.path.isdir(symfs_dir): 152 symfs_dir = None 153 kallsyms = 'binary_cache/kallsyms' 154 if not os.path.isfile(kallsyms): 155 kallsyms = None 156 157 # init member variables 158 self.config = config 159 self.symfs_dir = symfs_dir 160 self.kallsyms = kallsyms 161 self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None 162 if config.get('pid_filters'): 163 self.pid_filter = {int(x) for x in config['pid_filters']} 164 else: 165 self.pid_filter = None 166 if config.get('tid_filters'): 167 self.tid_filter = {int(x) for x in config['tid_filters']} 168 else: 169 self.tid_filter = None 170 self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None 171 172 config['annotate_dest_dir'] = 'annotated_files' 173 output_dir = config['annotate_dest_dir'] 174 if os.path.isdir(output_dir): 175 shutil.rmtree(output_dir) 176 os.makedirs(output_dir) 177 178 179 self.addr2line = Addr2Line(self.config['ndk_path'], symfs_dir, config.get('source_dirs')) 180 self.period = 0 181 self.dso_periods = {} 182 self.file_periods = {} 183 184 185 def annotate(self): 186 self._collect_addrs() 187 self._convert_addrs_to_lines() 188 self._generate_periods() 189 self._write_summary() 190 self._annotate_files() 191 192 193 def _collect_addrs(self): 194 """Read perf.data, collect all addresses we need to convert to 195 source file:line. 196 """ 197 for perf_data in self.config['perf_data_list']: 198 lib = ReportLib() 199 lib.SetRecordFile(perf_data) 200 if self.symfs_dir: 201 lib.SetSymfs(self.symfs_dir) 202 if self.kallsyms: 203 lib.SetKallsymsFile(self.kallsyms) 204 while True: 205 sample = lib.GetNextSample() 206 if sample is None: 207 lib.Close() 208 break 209 if not self._filter_sample(sample): 210 continue 211 symbols = [] 212 symbols.append(lib.GetSymbolOfCurrentSample()) 213 callchain = lib.GetCallChainOfCurrentSample() 214 for i in range(callchain.nr): 215 symbols.append(callchain.entries[i].symbol) 216 for symbol in symbols: 217 if self._filter_symbol(symbol): 218 self.addr2line.add_addr(symbol.dso_name, symbol.symbol_addr, 219 symbol.vaddr_in_file) 220 self.addr2line.add_addr(symbol.dso_name, symbol.symbol_addr, 221 symbol.symbol_addr) 222 223 224 def _filter_sample(self, sample): 225 """Return true if the sample can be used.""" 226 if self.comm_filter: 227 if sample.thread_comm not in self.comm_filter: 228 return False 229 if self.pid_filter: 230 if sample.pid not in self.pid_filter: 231 return False 232 if self.tid_filter: 233 if sample.tid not in self.tid_filter: 234 return False 235 return True 236 237 238 def _filter_symbol(self, symbol): 239 if not self.dso_filter or symbol.dso_name in self.dso_filter: 240 return True 241 return False 242 243 244 def _convert_addrs_to_lines(self): 245 self.addr2line.convert_addrs_to_lines() 246 247 248 def _generate_periods(self): 249 """read perf.data, collect Period for all types: 250 binaries, source files, functions, lines. 251 """ 252 for perf_data in self.config['perf_data_list']: 253 lib = ReportLib() 254 lib.SetRecordFile(perf_data) 255 if self.symfs_dir: 256 lib.SetSymfs(self.symfs_dir) 257 if self.kallsyms: 258 lib.SetKallsymsFile(self.kallsyms) 259 while True: 260 sample = lib.GetNextSample() 261 if sample is None: 262 lib.Close() 263 break 264 if not self._filter_sample(sample): 265 continue 266 self._generate_periods_for_sample(lib, sample) 267 268 269 def _generate_periods_for_sample(self, lib, sample): 270 symbols = [] 271 symbols.append(lib.GetSymbolOfCurrentSample()) 272 callchain = lib.GetCallChainOfCurrentSample() 273 for i in range(callchain.nr): 274 symbols.append(callchain.entries[i].symbol) 275 # Each sample has a callchain, but its period is only used once 276 # to add period for each function/source_line/source_file/binary. 277 # For example, if more than one entry in the callchain hits a 278 # function, the event count of that function is only increased once. 279 # Otherwise, we may get periods > 100%. 280 is_sample_used = False 281 used_dso_dict = {} 282 used_file_dict = {} 283 used_function_dict = {} 284 used_line_dict = {} 285 period = Period(sample.period, sample.period) 286 for j, symbol in enumerate(symbols): 287 if j == 1: 288 period = Period(0, sample.period) 289 if not self._filter_symbol(symbol): 290 continue 291 is_sample_used = True 292 # Add period to dso. 293 self._add_dso_period(symbol.dso_name, period, used_dso_dict) 294 # Add period to source file. 295 sources = self.addr2line.get_sources(symbol.dso_name, symbol.vaddr_in_file) 296 for source in sources: 297 if source.file: 298 self._add_file_period(source, period, used_file_dict) 299 # Add period to line. 300 if source.line: 301 self._add_line_period(source, period, used_line_dict) 302 # Add period to function. 303 sources = self.addr2line.get_sources(symbol.dso_name, symbol.symbol_addr) 304 for source in sources: 305 if source.file: 306 self._add_file_period(source, period, used_file_dict) 307 if source.function: 308 self._add_function_period(source, period, used_function_dict) 309 310 if is_sample_used: 311 self.period += sample.period 312 313 314 def _add_dso_period(self, dso_name, period, used_dso_dict): 315 if dso_name not in used_dso_dict: 316 used_dso_dict[dso_name] = True 317 dso_period = self.dso_periods.get(dso_name) 318 if dso_period is None: 319 dso_period = self.dso_periods[dso_name] = DsoPeriod(dso_name) 320 dso_period.add_period(period) 321 322 323 def _add_file_period(self, source, period, used_file_dict): 324 if source.file_key not in used_file_dict: 325 used_file_dict[source.file_key] = True 326 file_period = self.file_periods.get(source.file) 327 if file_period is None: 328 file_period = self.file_periods[source.file] = FilePeriod(source.file) 329 file_period.add_period(period) 330 331 332 def _add_line_period(self, source, period, used_line_dict): 333 if source.line_key not in used_line_dict: 334 used_line_dict[source.line_key] = True 335 file_period = self.file_periods[source.file] 336 file_period.add_line_period(source.line, period) 337 338 339 def _add_function_period(self, source, period, used_function_dict): 340 if source.function_key not in used_function_dict: 341 used_function_dict[source.function_key] = True 342 file_period = self.file_periods[source.file] 343 file_period.add_function_period(source.function, source.line, period) 344 345 346 def _write_summary(self): 347 summary = os.path.join(self.config['annotate_dest_dir'], 'summary') 348 with open(summary, 'w') as f: 349 f.write('total period: %d\n\n' % self.period) 350 dso_periods = sorted(self.dso_periods.values(), 351 key=lambda x: x.period.acc_period, reverse=True) 352 for dso_period in dso_periods: 353 f.write('dso %s: %s\n' % (dso_period.dso_name, 354 self._get_percentage_str(dso_period.period))) 355 f.write('\n') 356 357 file_periods = sorted(self.file_periods.values(), 358 key=lambda x: x.period.acc_period, reverse=True) 359 for file_period in file_periods: 360 f.write('file %s: %s\n' % (file_period.file, 361 self._get_percentage_str(file_period.period))) 362 for file_period in file_periods: 363 f.write('\n\n%s: %s\n' % (file_period.file, 364 self._get_percentage_str(file_period.period))) 365 values = [] 366 for func_name in file_period.function_dict.keys(): 367 func_start_line, period = file_period.function_dict[func_name] 368 values.append((func_name, func_start_line, period)) 369 values = sorted(values, key=lambda x: x[2].acc_period, reverse=True) 370 for value in values: 371 f.write('\tfunction (%s): line %d, %s\n' % ( 372 value[0], value[1], self._get_percentage_str(value[2]))) 373 f.write('\n') 374 for line in sorted(file_period.line_dict.keys()): 375 f.write('\tline %d: %s\n' % ( 376 line, self._get_percentage_str(file_period.line_dict[line]))) 377 378 379 def _get_percentage_str(self, period, short=False): 380 s = 'acc_p: %f%%, p: %f%%' if short else 'accumulated_period: %f%%, period: %f%%' 381 return s % self._get_percentage(period) 382 383 384 def _get_percentage(self, period): 385 if self.period == 0: 386 return (0, 0) 387 acc_p = 100.0 * period.acc_period / self.period 388 p = 100.0 * period.period / self.period 389 return (acc_p, p) 390 391 392 def _annotate_files(self): 393 """Annotate Source files: add acc_period/period for each source file. 394 1. Annotate java source files, which have $JAVA_SRC_ROOT prefix. 395 2. Annotate c++ source files. 396 """ 397 dest_dir = self.config['annotate_dest_dir'] 398 for key in self.file_periods: 399 from_path = key 400 if not os.path.isfile(from_path): 401 log_warning("can't find source file for path %s" % from_path) 402 continue 403 if from_path.startswith('/'): 404 to_path = os.path.join(dest_dir, from_path[1:]) 405 elif is_windows() and ':\\' in from_path: 406 to_path = os.path.join(dest_dir, from_path.replace(':\\', os.sep)) 407 else: 408 to_path = os.path.join(dest_dir, from_path) 409 is_java = from_path.endswith('.java') 410 self._annotate_file(from_path, to_path, self.file_periods[key], is_java) 411 412 413 def _annotate_file(self, from_path, to_path, file_period, is_java): 414 """Annotate a source file. 415 416 Annotate a source file in three steps: 417 1. In the first line, show periods of this file. 418 2. For each function, show periods of this function. 419 3. For each line not hitting the same line as functions, show 420 line periods. 421 """ 422 log_info('annotate file %s' % from_path) 423 with open(from_path, 'r') as rf: 424 lines = rf.readlines() 425 426 annotates = {} 427 for line in file_period.line_dict.keys(): 428 annotates[line] = self._get_percentage_str(file_period.line_dict[line], True) 429 for func_name in file_period.function_dict.keys(): 430 func_start_line, period = file_period.function_dict[func_name] 431 if func_start_line == -1: 432 continue 433 line = func_start_line - 1 if is_java else func_start_line 434 annotates[line] = '[func] ' + self._get_percentage_str(period, True) 435 annotates[1] = '[file] ' + self._get_percentage_str(file_period.period, True) 436 437 max_annotate_cols = 0 438 for key in annotates: 439 max_annotate_cols = max(max_annotate_cols, len(annotates[key])) 440 441 empty_annotate = ' ' * (max_annotate_cols + 6) 442 443 dirname = os.path.dirname(to_path) 444 if not os.path.isdir(dirname): 445 os.makedirs(dirname) 446 with open(to_path, 'w') as wf: 447 for line in range(1, len(lines) + 1): 448 annotate = annotates.get(line) 449 if annotate is None: 450 if not lines[line-1].strip(): 451 annotate = '' 452 else: 453 annotate = empty_annotate 454 else: 455 annotate = '/* ' + annotate + ( 456 ' ' * (max_annotate_cols - len(annotate))) + ' */' 457 wf.write(annotate) 458 wf.write(lines[line-1]) 459 460def main(): 461 parser = argparse.ArgumentParser(description=""" 462 Annotate source files based on profiling data. It reads line information from binary_cache 463 generated by app_profiler.py or binary_cache_builder.py, and generate annotated source 464 files in annotated_files directory.""") 465 parser.add_argument('-i', '--perf_data_list', nargs='+', action='append', help=""" 466 The paths of profiling data. Default is perf.data.""") 467 parser.add_argument('-s', '--source_dirs', type=extant_dir, nargs='+', action='append', help=""" 468 Directories to find source files.""") 469 parser.add_argument('--comm', nargs='+', action='append', help=""" 470 Use samples only in threads with selected names.""") 471 parser.add_argument('--pid', nargs='+', action='append', help=""" 472 Use samples only in processes with selected process ids.""") 473 parser.add_argument('--tid', nargs='+', action='append', help=""" 474 Use samples only in threads with selected thread ids.""") 475 parser.add_argument('--dso', nargs='+', action='append', help=""" 476 Use samples only in selected binaries.""") 477 parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.') 478 479 args = parser.parse_args() 480 config = {} 481 config['perf_data_list'] = flatten_arg_list(args.perf_data_list) 482 if not config['perf_data_list']: 483 config['perf_data_list'].append('perf.data') 484 config['source_dirs'] = flatten_arg_list(args.source_dirs) 485 config['comm_filters'] = flatten_arg_list(args.comm) 486 config['pid_filters'] = flatten_arg_list(args.pid) 487 config['tid_filters'] = flatten_arg_list(args.tid) 488 config['dso_filters'] = flatten_arg_list(args.dso) 489 config['ndk_path'] = args.ndk_path 490 491 annotator = SourceFileAnnotator(config) 492 annotator.annotate() 493 log_info('annotate finish successfully, please check result in annotated_files/.') 494 495if __name__ == '__main__': 496 main() 497