1from parser.aggregate import aggregate_times, nan_to_zero, LAYER_TOTAL 2from parser.naming import layers, names, phases, make_tag, subphases 3from parser.naming import LAYER_APPLICATION, LAYER_CPU 4from parser.naming import (PHASE_INITIALIZATION, PHASE_PREPARATION, PHASE_COMPILATION, 5 PHASE_INPUTS_AND_OUTPUTS, PHASE_EXECUTION, PHASE_RESULTS, 6 PHASE_TERMINATION, PHASE_OVERALL, PHASE_WARMUP, 7 PHASE_BENCHMARK) 8import json 9import math 10import sys 11 12def print_stats(tracker_map, print_detail=True, total_times=False, per_execution=False, 13 json_output=False, starting_mark='', sep=''): 14 """ Prints statistics for a single Overall phase as text or json. 15 16 For text output: 17 By default prints the self-time for each layer, prints total times instead if 18 given total_times=True. 19 20 By default prints stats for all phases, prints only the Execution and its 21 subphases (as per-execution times) if per_execution=True. 22 23 If per_execution=True and the trace contains separate Warmup and Benchmark 24 phases, prints only the Benchmark phase. 25 26 For json output: 27 The json output is internal to NNAPI and is not guaranteed stable or 28 extensively defined. It does however contain a version field so that 29 backwards-compativle tools can be created on top of it. 30 31 The json output includes both the statistics themselves produced by 32 aggregate_times as well as the values used to create the text output 33 so that those are easily available. 34 35 Look at the end of the function for the fields included in the json. 36 """ 37 PHASE_EXECUTION_LESS_IO_AND_RESULTS = "PEO" 38 phases_to_pick = phases + [PHASE_INPUTS_AND_OUTPUTS, PHASE_RESULTS] 39 40 for tracker in tracker_map.values(): 41 if not tracker.is_complete(): 42 sys.stderr.write("Incomplete trace, not able to print all statistics\n") 43 return 44 if sep: 45 print(sep) 46 47 # Select template and statistics to use 48 times, self_times, has_warmup_and_benchmark, execution_counts = aggregate_times(tracker_map) 49 if not per_execution: 50 template = TEMPLATE_ALL_PHASES 51 else: 52 template = TEMPLATE_EXECUTION_ONLY 53 if total_times: 54 template = template.replace("self-times", "total time") 55 times_to_use = times 56 else: 57 times_to_use = self_times 58 if has_warmup_and_benchmark and per_execution: 59 template = template.replace("Execution", "Benchmark") 60 for phase in [PHASE_EXECUTION] + subphases[PHASE_EXECUTION]: 61 for layer in layers + [LAYER_TOTAL]: 62 times_to_use[phase][layer] = times_to_use[PHASE_BENCHMARK][phase][layer] 63 64 # Rewrite template shorthand 65 template = template.replace(":fl", ":>11.2f") 66 template = template.replace(":f", ":>9.2f") 67 68 # Gather template inputs from statistics 69 values = dict() 70 full_total = 0.0 71 has_cpu = False 72 for layer in layers: 73 for phase in phases_to_pick: 74 t = times_to_use[phase][layer] 75 values[make_tag(layer, phase)] = t 76 if layer == LAYER_CPU: 77 has_cpu = (has_cpu or t > 0.0) 78 79 # Calculate layer totals and PHASE_EXECUTION_LESS_IO_AND_RESULTS 80 for phase in phases_to_pick: 81 values[make_tag(LAYER_TOTAL, phase)] = times_to_use[phase][LAYER_TOTAL] 82 for layer in layers + [LAYER_TOTAL]: 83 values[make_tag(layer, PHASE_EXECUTION_LESS_IO_AND_RESULTS)] = ( 84 values[make_tag(layer, PHASE_EXECUTION)] - 85 values[make_tag(layer, PHASE_INPUTS_AND_OUTPUTS)] - 86 values[make_tag(layer, PHASE_RESULTS)]) 87 values[make_tag(layer, PHASE_OVERALL)] = times_to_use[PHASE_OVERALL][layer] 88 # Calculate layer execution percentages 89 for layer in layers: 90 if values[make_tag(LAYER_TOTAL, PHASE_EXECUTION)] > 0.0: 91 values[make_tag(layer, "PEp")] = (values[make_tag(layer, PHASE_EXECUTION)] * 100.0 / 92 values[make_tag(LAYER_TOTAL, PHASE_EXECUTION)]) 93 else: 94 values[make_tag(layer, "PEp")] = math.nan 95 96 # Make output numbers per-execution if desired 97 if per_execution: 98 if has_warmup_and_benchmark: 99 divide_by = execution_counts[PHASE_BENCHMARK] 100 else: 101 divide_by = execution_counts[PHASE_OVERALL] 102 for layer in (layers + [LAYER_TOTAL]): 103 for phase in [PHASE_INPUTS_AND_OUTPUTS, PHASE_EXECUTION_LESS_IO_AND_RESULTS, PHASE_RESULTS, PHASE_EXECUTION]: 104 if divide_by != 0: 105 values[layer + "_" + phase] = values[layer + "_" + phase] / divide_by 106 else: 107 values[layer + "_" + phase] = math.nan 108 109 # Generate and print output 110 if not json_output: 111 # Apply template and prettify numbers 112 output = template.format(**values) 113 output = output.replace(" 0.00%", " -") 114 output = output.replace(" 0.00", " -") 115 output = output.replace(" nan", " n/a") 116 117 # Print output 118 print(starting_mark) 119 for line in output.splitlines(): 120 if line[0:3] == "CPU" and not has_cpu: 121 continue 122 print(line) 123 if print_detail: 124 for pid in tracker_map: 125 tracker = tracker_map[pid] 126 tracker.print_stats() 127 for pid in tracker_map: 128 tracker = tracker_map[pid] 129 tracker.print() 130 else: 131 output = dict(times=times, self_times=self_times, execution_counts=execution_counts, 132 template_inputs=values, version=1, starting_mark=starting_mark) 133 output = json.dumps(output, indent=2, sort_keys=True) 134 # JSON doesn't recognize NaN 135 output = output.replace("NaN", "null") 136 print(output) 137 138def reset_trackers(tracker_map): 139 for pid in tracker_map: 140 tracker = tracker_map[pid] 141 tracker.reset() 142 143TEMPLATE_ALL_PHASES = """ 144=========================================================================================================================================== 145NNAPI timing summary (self-times, ms wall-clock) Execution 146 ---------------------------------------------------- 147 Initialization Preparation Compilation I/O Compute Results Ex. total Termination Total 148 -------------- ----------- ----------- ----------- ------------ ----------- ----------- ----------- ---------- 149Application {LA_PI:f} {LA_PP:f} {LA_PC:f} {LA_PIO:fl} {LA_PEO:fl} {LA_PR:f} {LA_PE:f} {LA_PT:f} {LA_PO:f}* 150Runtime {LR_PI:f} {LR_PP:f} {LR_PC:f} {LR_PIO:fl} {LR_PEO:fl} {LR_PR:f} {LR_PE:f} {LR_PT:f} {LR_PO:f} 151IPC {LI_PI:f} {LI_PP:f} {LI_PC:f} {LI_PIO:fl} {LI_PEO:fl} {LI_PR:f} {LI_PE:f} {LI_PT:f} {LI_PO:f} 152Driver {LD_PI:f} {LD_PP:f} {LD_PC:f} {LD_PIO:fl} {LD_PEO:fl} {LD_PR:f} {LD_PE:f} {LD_PT:f} {LD_PO:f} 153CPU {LC_PI:f} {LC_PP:f} {LC_PC:f} {LC_PIO:fl} {LC_PEO:fl} {LC_PR:f} {LC_PE:f} {LC_PT:f} {LC_PO:f} 154 155Total {LT_PI:f}* {LT_PP:f}* {LT_PC:f}* {LT_PIO:fl}* {LT_PEO:fl}* {LT_PR:f}* {LT_PE:f}* {LT_PT:f}* {LT_PO:f}* 156=========================================================================================================================================== 157* This total ignores missing (n/a) values and thus is not necessarily consistent with the rest of the numbers 158""" 159 160TEMPLATE_EXECUTION_ONLY = """ 161================================================================================ 162NNAPI timing summary (self-times, ms wall-clock) Execution 163 ------------------------------------------------------------------ 164 I/O Compute Results Total Percentage 165 ----------- ------------ ----------- ----------- ----------- 166Application {LA_PIO:fl} {LA_PEO:fl} {LA_PR:f} {LA_PE:f} {LA_PEp:fl}% 167Runtime {LR_PIO:fl} {LR_PEO:fl} {LR_PR:f} {LR_PE:f} {LR_PEp:fl}% 168IPC {LI_PIO:fl} {LI_PEO:fl} {LI_PR:f} {LI_PE:f} {LI_PEp:fl}% 169Driver {LD_PIO:fl} {LD_PEO:fl} {LD_PR:f} {LD_PE:f} {LD_PEp:fl}% 170CPU {LC_PIO:fl} {LC_PEO:fl} {LC_PR:f} {LC_PE:f} {LC_PEp:fl}% 171 172Total {LT_PIO:fl}* {LT_PEO:fl}* {LT_PR:f}* {LT_PE:f} 100% 173================================================================================ 174* This total ignores missing (n/a) values and thus is not necessarily consistent 175 with the rest of the numbers 176""" 177