1from parser.aggregate import aggregate_times, nan_to_zero, LAYER_TOTAL
2from parser.naming import layers, names, phases, make_tag, subphases
3from parser.naming import LAYER_APPLICATION, LAYER_CPU
4from parser.naming import (PHASE_INITIALIZATION, PHASE_PREPARATION, PHASE_COMPILATION,
5                           PHASE_INPUTS_AND_OUTPUTS, PHASE_EXECUTION, PHASE_RESULTS,
6                           PHASE_TERMINATION, PHASE_OVERALL, PHASE_WARMUP,
7                           PHASE_BENCHMARK)
8import json
9import math
10import sys
11
12def print_stats(tracker_map, print_detail=True, total_times=False, per_execution=False,
13                json_output=False, starting_mark='', sep=''):
14  """ Prints statistics for a single Overall phase as text or json.
15
16      For text output:
17        By default prints the self-time for each layer, prints total times instead if
18        given total_times=True.
19
20        By default prints stats for all phases, prints only the Execution and its
21        subphases (as per-execution times) if per_execution=True.
22
23        If per_execution=True and the trace contains separate Warmup and Benchmark
24        phases, prints only the Benchmark phase.
25
26      For json output:
27        The json output is internal to NNAPI and is not guaranteed stable or
28        extensively defined. It does however contain a version field so that
29        backwards-compativle tools can be created on top of it.
30
31        The json output includes both the statistics themselves produced by
32        aggregate_times as well as the values used to create the text output
33        so that those are easily available.
34
35        Look at the end of the function for the fields included in the json.
36  """
37  PHASE_EXECUTION_LESS_IO_AND_RESULTS = "PEO"
38  phases_to_pick = phases + [PHASE_INPUTS_AND_OUTPUTS, PHASE_RESULTS]
39
40  for tracker in tracker_map.values():
41    if not tracker.is_complete():
42      sys.stderr.write("Incomplete trace, not able to print all statistics\n")
43      return
44  if sep:
45    print(sep)
46
47  # Select template and statistics to use
48  times, self_times, has_warmup_and_benchmark, execution_counts = aggregate_times(tracker_map)
49  if not per_execution:
50    template = TEMPLATE_ALL_PHASES
51  else:
52    template = TEMPLATE_EXECUTION_ONLY
53  if total_times:
54    template = template.replace("self-times", "total time")
55    times_to_use = times
56  else:
57    times_to_use = self_times
58  if has_warmup_and_benchmark and per_execution:
59    template = template.replace("Execution", "Benchmark")
60    for phase in [PHASE_EXECUTION] + subphases[PHASE_EXECUTION]:
61      for layer in layers + [LAYER_TOTAL]:
62        times_to_use[phase][layer] = times_to_use[PHASE_BENCHMARK][phase][layer]
63
64  # Rewrite template shorthand
65  template = template.replace(":fl", ":>11.2f")
66  template = template.replace(":f", ":>9.2f")
67
68  # Gather template inputs from statistics
69  values = dict()
70  full_total = 0.0
71  has_cpu = False
72  for layer in layers:
73    for phase in phases_to_pick:
74      t = times_to_use[phase][layer]
75      values[make_tag(layer, phase)] = t
76      if layer == LAYER_CPU:
77        has_cpu = (has_cpu or t > 0.0)
78
79  # Calculate layer totals and PHASE_EXECUTION_LESS_IO_AND_RESULTS
80  for phase in phases_to_pick:
81    values[make_tag(LAYER_TOTAL, phase)] = times_to_use[phase][LAYER_TOTAL]
82  for layer in layers + [LAYER_TOTAL]:
83    values[make_tag(layer, PHASE_EXECUTION_LESS_IO_AND_RESULTS)] = (
84        values[make_tag(layer, PHASE_EXECUTION)] -
85        values[make_tag(layer, PHASE_INPUTS_AND_OUTPUTS)] -
86        values[make_tag(layer, PHASE_RESULTS)])
87    values[make_tag(layer, PHASE_OVERALL)] = times_to_use[PHASE_OVERALL][layer]
88  # Calculate layer execution percentages
89  for layer in layers:
90    if values[make_tag(LAYER_TOTAL, PHASE_EXECUTION)] > 0.0:
91      values[make_tag(layer, "PEp")] = (values[make_tag(layer, PHASE_EXECUTION)] * 100.0 /
92                                        values[make_tag(LAYER_TOTAL, PHASE_EXECUTION)])
93    else:
94      values[make_tag(layer, "PEp")] = math.nan
95
96  # Make output numbers per-execution if desired
97  if per_execution:
98    if has_warmup_and_benchmark:
99      divide_by = execution_counts[PHASE_BENCHMARK]
100    else:
101      divide_by = execution_counts[PHASE_OVERALL]
102    for layer in (layers + [LAYER_TOTAL]):
103      for phase in [PHASE_INPUTS_AND_OUTPUTS, PHASE_EXECUTION_LESS_IO_AND_RESULTS, PHASE_RESULTS, PHASE_EXECUTION]:
104        if divide_by != 0:
105          values[layer + "_" + phase] = values[layer + "_" + phase] / divide_by
106        else:
107          values[layer + "_" + phase] = math.nan
108
109  # Generate and print output
110  if not json_output:
111    # Apply template and prettify numbers
112    output = template.format(**values)
113    output = output.replace(" 0.00%", "     -")
114    output = output.replace(" 0.00", "    -")
115    output = output.replace(" nan", " n/a")
116
117    # Print output
118    print(starting_mark)
119    for line in output.splitlines():
120      if line[0:3] == "CPU" and not has_cpu:
121        continue
122      print(line)
123    if print_detail:
124      for pid in tracker_map:
125        tracker = tracker_map[pid]
126        tracker.print_stats()
127      for pid in tracker_map:
128        tracker = tracker_map[pid]
129        tracker.print()
130  else:
131    output = dict(times=times, self_times=self_times, execution_counts=execution_counts,
132                  template_inputs=values, version=1, starting_mark=starting_mark)
133    output = json.dumps(output, indent=2, sort_keys=True)
134    # JSON doesn't recognize NaN
135    output = output.replace("NaN", "null")
136    print(output)
137
138def reset_trackers(tracker_map):
139  for pid in tracker_map:
140    tracker = tracker_map[pid]
141    tracker.reset()
142
143TEMPLATE_ALL_PHASES = """
144===========================================================================================================================================
145NNAPI timing summary (self-times, ms wall-clock)                                                      Execution
146                                                           ----------------------------------------------------
147              Initialization   Preparation   Compilation           I/O       Compute      Results     Ex. total   Termination        Total
148              --------------   -----------   -----------   -----------  ------------  -----------   -----------   -----------   ----------
149Application        {LA_PI:f}     {LA_PP:f}     {LA_PC:f}   {LA_PIO:fl}   {LA_PEO:fl}    {LA_PR:f}     {LA_PE:f}     {LA_PT:f}    {LA_PO:f}*
150Runtime            {LR_PI:f}     {LR_PP:f}     {LR_PC:f}   {LR_PIO:fl}   {LR_PEO:fl}    {LR_PR:f}     {LR_PE:f}     {LR_PT:f}    {LR_PO:f}
151IPC                {LI_PI:f}     {LI_PP:f}     {LI_PC:f}   {LI_PIO:fl}   {LI_PEO:fl}    {LI_PR:f}     {LI_PE:f}     {LI_PT:f}    {LI_PO:f}
152Driver             {LD_PI:f}     {LD_PP:f}     {LD_PC:f}   {LD_PIO:fl}   {LD_PEO:fl}    {LD_PR:f}     {LD_PE:f}     {LD_PT:f}    {LD_PO:f}
153CPU                {LC_PI:f}     {LC_PP:f}     {LC_PC:f}   {LC_PIO:fl}   {LC_PEO:fl}    {LC_PR:f}     {LC_PE:f}     {LC_PT:f}    {LC_PO:f}
154
155Total              {LT_PI:f}*    {LT_PP:f}*    {LT_PC:f}*  {LT_PIO:fl}*  {LT_PEO:fl}*   {LT_PR:f}*    {LT_PE:f}*    {LT_PT:f}*   {LT_PO:f}*
156===========================================================================================================================================
157* This total ignores missing (n/a) values and thus is not necessarily consistent with the rest of the numbers
158"""
159
160TEMPLATE_EXECUTION_ONLY = """
161================================================================================
162NNAPI timing summary (self-times, ms wall-clock)                       Execution
163              ------------------------------------------------------------------
164                      I/O       Compute      Results         Total    Percentage
165              -----------  ------------  -----------   -----------   -----------
166Application   {LA_PIO:fl}   {LA_PEO:fl}    {LA_PR:f}     {LA_PE:f}  {LA_PEp:fl}%
167Runtime       {LR_PIO:fl}   {LR_PEO:fl}    {LR_PR:f}     {LR_PE:f}  {LR_PEp:fl}%
168IPC           {LI_PIO:fl}   {LI_PEO:fl}    {LI_PR:f}     {LI_PE:f}  {LI_PEp:fl}%
169Driver        {LD_PIO:fl}   {LD_PEO:fl}    {LD_PR:f}     {LD_PE:f}  {LD_PEp:fl}%
170CPU           {LC_PIO:fl}   {LC_PEO:fl}    {LC_PR:f}     {LC_PE:f}  {LC_PEp:fl}%
171
172Total         {LT_PIO:fl}*  {LT_PEO:fl}*   {LT_PR:f}*    {LT_PE:f}          100%
173================================================================================
174* This total ignores missing (n/a) values and thus is not necessarily consistent
175  with the rest of the numbers
176"""
177