1#
2# Copyright (C) 2013 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16
17"""Histogram generation tools."""
18
19from __future__ import absolute_import
20from __future__ import division
21
22from collections import defaultdict
23
24from update_payload import format_utils
25
26
27class Histogram(object):
28  """A histogram generating object.
29
30  This object serves the sole purpose of formatting (key, val) pairs as an
31  ASCII histogram, including bars and percentage markers, and taking care of
32  label alignment, scaling, etc. In addition to the standard __init__
33  interface, two static methods are provided for conveniently converting data
34  in different formats into a histogram. Histogram generation is exported via
35  its __str__ method, and looks as follows:
36
37    Yes |################    | 5 (83.3%)
38    No  |###                 | 1 (16.6%)
39
40  TODO(garnold) we may want to add actual methods for adding data or tweaking
41  the output layout and formatting. For now, though, this is fine.
42
43  """
44
45  def __init__(self, data, scale=20, formatter=None):
46    """Initialize a histogram object.
47
48    Args:
49      data: list of (key, count) pairs constituting the histogram
50      scale: number of characters used to indicate 100%
51      formatter: function used for formatting raw histogram values
52
53    """
54    self.data = data
55    self.scale = scale
56    self.formatter = formatter or str
57    self.max_key_len = max([len(str(key)) for key, count in self.data])
58    self.total = sum([count for key, count in self.data])
59
60  @staticmethod
61  def FromCountDict(count_dict, scale=20, formatter=None, key_names=None):
62    """Takes a dictionary of counts and returns a histogram object.
63
64    This simply converts a mapping from names to counts into a list of (key,
65    count) pairs, optionally translating keys into name strings, then
66    generating and returning a histogram for them. This is a useful convenience
67    call for clients that update a dictionary of counters as they (say) scan a
68    data stream.
69
70    Args:
71      count_dict: dictionary mapping keys to occurrence counts
72      scale: number of characters used to indicate 100%
73      formatter: function used for formatting raw histogram values
74      key_names: dictionary mapping keys to name strings
75    Returns:
76      A histogram object based on the given data.
77
78    """
79    namer = None
80    if key_names:
81      namer = lambda key: key_names[key]
82    else:
83      namer = lambda key: key
84
85    hist = [(namer(key), count) for key, count in count_dict.items()]
86    return Histogram(hist, scale, formatter)
87
88  @staticmethod
89  def FromKeyList(key_list, scale=20, formatter=None, key_names=None):
90    """Takes a list of (possibly recurring) keys and returns a histogram object.
91
92    This converts the list into a dictionary of counters, then uses
93    FromCountDict() to generate the actual histogram. For example:
94
95      ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ...
96
97    Args:
98      key_list: list of (possibly recurring) keys
99      scale: number of characters used to indicate 100%
100      formatter: function used for formatting raw histogram values
101      key_names: dictionary mapping keys to name strings
102    Returns:
103      A histogram object based on the given data.
104
105    """
106    count_dict = defaultdict(int)  # Unset items default to zero
107    for key in key_list:
108      count_dict[key] += 1
109    return Histogram.FromCountDict(count_dict, scale, formatter, key_names)
110
111  def __str__(self):
112    hist_lines = []
113    hist_bar = '|'
114    for key, count in self.data:
115      if self.total:
116        bar_len = count * self.scale // self.total
117        hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale)
118
119      line = '%s %s %s' % (
120          str(key).ljust(self.max_key_len),
121          hist_bar,
122          self.formatter(count))
123      percent_str = format_utils.NumToPercent(count, self.total)
124      if percent_str:
125        line += ' (%s)' % percent_str
126      hist_lines.append(line)
127
128    return '\n'.join(hist_lines)
129
130  def GetKeys(self):
131    """Returns the keys of the histogram."""
132    return [key for key, _ in self.data]
133