1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_LIBARTBASE_BASE_HISTOGRAM_INL_H_
18 #define ART_LIBARTBASE_BASE_HISTOGRAM_INL_H_
19 
20 #include <algorithm>
21 #include <cmath>
22 #include <limits>
23 #include <ostream>
24 
25 #include "histogram.h"
26 
27 #include <android-base/logging.h>
28 
29 #include "bit_utils.h"
30 #include "time_utils.h"
31 #include "utils.h"
32 
33 namespace art {
34 
AddValue(Value value)35 template <class Value> inline void Histogram<Value>::AddValue(Value value) {
36   CHECK_GE(value, static_cast<Value>(0));
37   if (value >= max_) {
38     Value new_max = ((value + 1) / bucket_width_ + 1) * bucket_width_;
39     DCHECK_GT(new_max, max_);
40     GrowBuckets(new_max);
41   }
42   BucketiseValue(value);
43 }
44 
AdjustAndAddValue(Value value)45 template <class Value> inline void Histogram<Value>::AdjustAndAddValue(Value value) {
46   AddValue(value / kAdjust);
47 }
48 
Histogram(const char * name)49 template <class Value> inline Histogram<Value>::Histogram(const char* name)
50     : kAdjust(0),
51       kInitialBucketCount(0),
52       name_(name),
53       max_buckets_(0),
54       sample_size_(0) {
55 }
56 
57 template <class Value>
Histogram(const char * name,Value initial_bucket_width,size_t max_buckets)58 inline Histogram<Value>::Histogram(const char* name, Value initial_bucket_width,
59                                    size_t max_buckets)
60     : kAdjust(1000),
61       kInitialBucketCount(kMinBuckets),
62       name_(name),
63       max_buckets_(max_buckets),
64       bucket_width_(initial_bucket_width) {
65   CHECK_GE(max_buckets, kInitialBucketCount);
66   CHECK_EQ(max_buckets_ % 2, 0u);
67   Reset();
68 }
69 
70 template <class Value>
GrowBuckets(Value new_max)71 inline void Histogram<Value>::GrowBuckets(Value new_max) {
72   while (max_ < new_max) {
73     // If we have reached the maximum number of buckets, merge buckets together.
74     DCHECK_LE(frequency_.size(), max_buckets_);
75     if (frequency_.size() == max_buckets_) {
76       DCHECK_EQ(frequency_.size() % 2, 0u);
77       // We double the width of each bucket to reduce the number of buckets by a factor of 2.
78       bucket_width_ *= 2;
79       const size_t limit = frequency_.size() / 2;
80       // Merge the frequencies by adding each adjacent two together.
81       for (size_t i = 0; i < limit; ++i) {
82         frequency_[i] = frequency_[i * 2] + frequency_[i * 2 + 1];
83       }
84       // Remove frequencies in the second half of the array which were added to the first half.
85       while (frequency_.size() > limit) {
86         frequency_.pop_back();
87       }
88     }
89     max_ += bucket_width_;
90     frequency_.push_back(0);
91   }
92 }
93 
FindBucket(Value val)94 template <class Value> inline size_t Histogram<Value>::FindBucket(Value val) const {
95   // Since this is only a linear histogram, bucket index can be found simply with
96   // dividing the value by the bucket width.
97   DCHECK_GE(val, min_);
98   DCHECK_LE(val, max_);
99   const size_t bucket_idx = static_cast<size_t>((val - min_) / bucket_width_);
100   DCHECK_GE(bucket_idx, 0ul);
101   DCHECK_LE(bucket_idx, GetBucketCount());
102   return bucket_idx;
103 }
104 
105 template <class Value>
BucketiseValue(Value val)106 inline void Histogram<Value>::BucketiseValue(Value val) {
107   CHECK_LT(val, max_);
108   sum_ += val;
109   sum_of_squares_ += val * val;
110   ++sample_size_;
111   ++frequency_[FindBucket(val)];
112   max_value_added_ = std::max(val, max_value_added_);
113   min_value_added_ = std::min(val, min_value_added_);
114 }
115 
Initialize()116 template <class Value> inline void Histogram<Value>::Initialize() {
117   for (size_t idx = 0; idx < kInitialBucketCount; idx++) {
118     frequency_.push_back(0);
119   }
120   // Cumulative frequency and ranges has a length of 1 over frequency.
121   max_ = bucket_width_ * GetBucketCount();
122 }
123 
GetBucketCount()124 template <class Value> inline size_t Histogram<Value>::GetBucketCount() const {
125   return frequency_.size();
126 }
127 
Reset()128 template <class Value> inline void Histogram<Value>::Reset() {
129   sum_of_squares_ = 0;
130   sample_size_ = 0;
131   min_ = 0;
132   sum_ = 0;
133   min_value_added_ = std::numeric_limits<Value>::max();
134   max_value_added_ = std::numeric_limits<Value>::min();
135   frequency_.clear();
136   Initialize();
137 }
138 
GetRange(size_t bucket_idx)139 template <class Value> inline Value Histogram<Value>::GetRange(size_t bucket_idx) const {
140   DCHECK_LE(bucket_idx, GetBucketCount());
141   return min_ + bucket_idx * bucket_width_;
142 }
143 
Mean()144 template <class Value> inline double Histogram<Value>::Mean() const {
145   DCHECK_GT(sample_size_, 0ull);
146   return static_cast<double>(sum_) / static_cast<double>(sample_size_);
147 }
148 
Variance()149 template <class Value> inline double Histogram<Value>::Variance() const {
150   DCHECK_GT(sample_size_, 0ull);
151   // Using algorithms for calculating variance over a population:
152   // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
153   Value sum_squared = sum_ * sum_;
154   double sum_squared_by_n_squared =
155       static_cast<double>(sum_squared) /
156       static_cast<double>(sample_size_ * sample_size_);
157   double sum_of_squares_by_n =
158       static_cast<double>(sum_of_squares_) / static_cast<double>(sample_size_);
159   return sum_of_squares_by_n - sum_squared_by_n_squared;
160 }
161 
162 template <class Value>
PrintBins(std::ostream & os,const CumulativeData & data)163 inline void Histogram<Value>::PrintBins(std::ostream& os, const CumulativeData& data) const {
164   DCHECK_GT(sample_size_, 0ull);
165   for (size_t bin_idx = 0; bin_idx < data.freq_.size(); ++bin_idx) {
166     if (bin_idx > 0 && data.perc_[bin_idx] == data.perc_[bin_idx - 1]) {
167       bin_idx++;
168       continue;
169     }
170     os << GetRange(bin_idx) << ": " << data.freq_[bin_idx] << "\t"
171        << data.perc_[bin_idx] * 100.0 << "%\n";
172   }
173 }
174 
175 template <class Value>
DumpBins(std::ostream & os)176 inline void Histogram<Value>::DumpBins(std::ostream& os) const {
177   DCHECK_GT(sample_size_, 0ull);
178   bool dumped_one = false;
179   for (size_t bin_idx = 0; bin_idx < frequency_.size(); ++bin_idx) {
180     if (frequency_[bin_idx] != 0U) {
181       if (dumped_one) {
182         // Prepend a comma if not the first bin.
183         os << ",";
184       } else {
185         dumped_one = true;
186       }
187       os << GetRange(bin_idx) << ":" << frequency_[bin_idx];
188     }
189   }
190 }
191 
192 template <class Value>
PrintConfidenceIntervals(std::ostream & os,double interval,const CumulativeData & data)193 inline void Histogram<Value>::PrintConfidenceIntervals(std::ostream &os, double interval,
194                                                        const CumulativeData& data) const {
195   static constexpr size_t kFractionalDigits = 3;
196   DCHECK_GT(interval, 0);
197   DCHECK_LT(interval, 1.0);
198   const double per_0 = (1.0 - interval) / 2.0;
199   const double per_1 = per_0 + interval;
200   const TimeUnit unit = GetAppropriateTimeUnit(Mean() * kAdjust);
201   os << Name() << ":\tSum: " << PrettyDuration(Sum() * kAdjust) << " "
202      << (interval * 100) << "% C.I. " << FormatDuration(Percentile(per_0, data) * kAdjust, unit,
203                                                         kFractionalDigits)
204      << "-" << FormatDuration(Percentile(per_1, data) * kAdjust, unit, kFractionalDigits) << " "
205      << "Avg: " << FormatDuration(Mean() * kAdjust, unit, kFractionalDigits) << " Max: "
206      << FormatDuration(Max() * kAdjust, unit, kFractionalDigits) << std::endl;
207 }
208 
209 template <class Value>
PrintMemoryUse(std::ostream & os)210 inline void Histogram<Value>::PrintMemoryUse(std::ostream &os) const {
211   os << Name();
212   if (sample_size_ != 0u) {
213     os << ": Avg: " << PrettySize(Mean()) << " Max: "
214        << PrettySize(Max()) << " Min: " << PrettySize(Min()) << "\n";
215   } else {
216     os << ": <no data>\n";
217   }
218 }
219 
220 template <class Value>
CreateHistogram(CumulativeData * out_data)221 inline void Histogram<Value>::CreateHistogram(CumulativeData* out_data) const {
222   DCHECK_GT(sample_size_, 0ull);
223   out_data->freq_.clear();
224   out_data->perc_.clear();
225   uint64_t accumulated = 0;
226   out_data->freq_.push_back(accumulated);
227   out_data->perc_.push_back(0.0);
228   for (size_t idx = 0; idx < frequency_.size(); idx++) {
229     accumulated += frequency_[idx];
230     out_data->freq_.push_back(accumulated);
231     out_data->perc_.push_back(static_cast<double>(accumulated) / static_cast<double>(sample_size_));
232   }
233   DCHECK_EQ(out_data->freq_.back(), sample_size_);
234   DCHECK_LE(std::abs(out_data->perc_.back() - 1.0), 0.001);
235 }
236 
237 #pragma clang diagnostic push
238 #pragma clang diagnostic ignored "-Wfloat-equal"
239 
240 template <class Value>
Percentile(double per,const CumulativeData & data)241 inline double Histogram<Value>::Percentile(double per, const CumulativeData& data) const {
242   DCHECK_GT(data.perc_.size(), 0ull);
243   size_t upper_idx = 0, lower_idx = 0;
244   for (size_t idx = 0; idx < data.perc_.size(); idx++) {
245     if (per <= data.perc_[idx]) {
246       upper_idx = idx;
247       break;
248     }
249 
250     if (per >= data.perc_[idx] && idx != 0 && data.perc_[idx] != data.perc_[idx - 1]) {
251       lower_idx = idx;
252     }
253   }
254 
255   const double lower_perc = data.perc_[lower_idx];
256   const double lower_value = static_cast<double>(GetRange(lower_idx));
257   if (per == lower_perc) {
258     return lower_value;
259   }
260 
261   const double upper_perc = data.perc_[upper_idx];
262   const double upper_value = static_cast<double>(GetRange(upper_idx));
263   if (per == upper_perc) {
264     return upper_value;
265   }
266   DCHECK_GT(upper_perc, lower_perc);
267 
268   double value = lower_value + (upper_value - lower_value) *
269                                (per - lower_perc) / (upper_perc - lower_perc);
270 
271   if (value < min_value_added_) {
272     value = min_value_added_;
273   } else if (value > max_value_added_) {
274     value = max_value_added_;
275   }
276 
277   return value;
278 }
279 
280 #pragma clang diagnostic pop
281 
282 }  // namespace art
283 #endif  // ART_LIBARTBASE_BASE_HISTOGRAM_INL_H_
284