1# 2# Copyright (C) 2013 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# 16 17"""Histogram generation tools.""" 18 19from __future__ import absolute_import 20from __future__ import division 21 22from collections import defaultdict 23 24from update_payload import format_utils 25 26 27class Histogram(object): 28 """A histogram generating object. 29 30 This object serves the sole purpose of formatting (key, val) pairs as an 31 ASCII histogram, including bars and percentage markers, and taking care of 32 label alignment, scaling, etc. In addition to the standard __init__ 33 interface, two static methods are provided for conveniently converting data 34 in different formats into a histogram. Histogram generation is exported via 35 its __str__ method, and looks as follows: 36 37 Yes |################ | 5 (83.3%) 38 No |### | 1 (16.6%) 39 40 TODO(garnold) we may want to add actual methods for adding data or tweaking 41 the output layout and formatting. For now, though, this is fine. 42 43 """ 44 45 def __init__(self, data, scale=20, formatter=None): 46 """Initialize a histogram object. 47 48 Args: 49 data: list of (key, count) pairs constituting the histogram 50 scale: number of characters used to indicate 100% 51 formatter: function used for formatting raw histogram values 52 53 """ 54 self.data = data 55 self.scale = scale 56 self.formatter = formatter or str 57 self.max_key_len = max([len(str(key)) for key, count in self.data]) 58 self.total = sum([count for key, count in self.data]) 59 60 @staticmethod 61 def FromCountDict(count_dict, scale=20, formatter=None, key_names=None): 62 """Takes a dictionary of counts and returns a histogram object. 63 64 This simply converts a mapping from names to counts into a list of (key, 65 count) pairs, optionally translating keys into name strings, then 66 generating and returning a histogram for them. This is a useful convenience 67 call for clients that update a dictionary of counters as they (say) scan a 68 data stream. 69 70 Args: 71 count_dict: dictionary mapping keys to occurrence counts 72 scale: number of characters used to indicate 100% 73 formatter: function used for formatting raw histogram values 74 key_names: dictionary mapping keys to name strings 75 Returns: 76 A histogram object based on the given data. 77 78 """ 79 namer = None 80 if key_names: 81 namer = lambda key: key_names[key] 82 else: 83 namer = lambda key: key 84 85 hist = [(namer(key), count) for key, count in count_dict.items()] 86 return Histogram(hist, scale, formatter) 87 88 @staticmethod 89 def FromKeyList(key_list, scale=20, formatter=None, key_names=None): 90 """Takes a list of (possibly recurring) keys and returns a histogram object. 91 92 This converts the list into a dictionary of counters, then uses 93 FromCountDict() to generate the actual histogram. For example: 94 95 ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ... 96 97 Args: 98 key_list: list of (possibly recurring) keys 99 scale: number of characters used to indicate 100% 100 formatter: function used for formatting raw histogram values 101 key_names: dictionary mapping keys to name strings 102 Returns: 103 A histogram object based on the given data. 104 105 """ 106 count_dict = defaultdict(int) # Unset items default to zero 107 for key in key_list: 108 count_dict[key] += 1 109 return Histogram.FromCountDict(count_dict, scale, formatter, key_names) 110 111 def __str__(self): 112 hist_lines = [] 113 hist_bar = '|' 114 for key, count in self.data: 115 if self.total: 116 bar_len = count * self.scale // self.total 117 hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale) 118 119 line = '%s %s %s' % ( 120 str(key).ljust(self.max_key_len), 121 hist_bar, 122 self.formatter(count)) 123 percent_str = format_utils.NumToPercent(count, self.total) 124 if percent_str: 125 line += ' (%s)' % percent_str 126 hist_lines.append(line) 127 128 return '\n'.join(hist_lines) 129 130 def GetKeys(self): 131 """Returns the keys of the histogram.""" 132 return [key for key, _ in self.data] 133