1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef ART_LIBARTBASE_BASE_HISTOGRAM_H_
17 #define ART_LIBARTBASE_BASE_HISTOGRAM_H_
18 
19 #include <string>
20 #include <vector>
21 
22 #include <android-base/macros.h>
23 
24 namespace art {
25 
26 // Creates a data histogram  for a better understanding of statistical data.
27 // Histogram analysis goes beyond simple mean and standard deviation to provide
28 // percentiles values, describing where the $% of the input data lies.
29 // Designed to be simple and used with timing logger in art.
30 
31 template <class Value> class Histogram {
32   const double kAdjust;
33   const size_t kInitialBucketCount;
34 
35  public:
36   class CumulativeData {
37     friend class Histogram<Value>;
38     std::vector<uint64_t> freq_;
39     std::vector<double> perc_;
40   };
41 
42   // Minimum and initial number of allocated buckets in histogram.
43   static constexpr size_t kMinBuckets = 8;
44   // Used by the cumulative timing logger to search the histogram set using for an existing split
45   // with the same name using CumulativeLogger::HistogramComparator.
46   explicit Histogram(const char* name);
47   // This is the expected constructor when creating new Histograms. Max_buckets must be even.
48   // Max_buckets, if specified, must be at least kMinBuckets.
49   Histogram(const char* name, Value initial_bucket_width, size_t max_buckets = 100);
50   void AddValue(Value);
51   void AdjustAndAddValue(Value);  // Add a value after dividing it by kAdjust.
52   // Builds the cumulative distribution function from the frequency data.
53   // Accumulative summation of frequencies.
54   // cumulative_freq[i] = sum(frequency[j] : 0 < j < i )
55   // Accumulative summation of percentiles; which is the frequency / SampleSize
56   // cumulative_perc[i] = sum(frequency[j] / SampleSize : 0 < j < i )
57   void CreateHistogram(CumulativeData* data) const;
58   // Reset the cumulative values, next time CreateHistogram is called it will recreate the cache.
59   void Reset();
60   double Mean() const;
61   double Variance() const;
62   double Percentile(double per, const CumulativeData& data) const;
63   void PrintConfidenceIntervals(std::ostream& os, double interval,
64                                 const CumulativeData& data) const;
65   void PrintMemoryUse(std::ostream& os) const;
66   void PrintBins(std::ostream& os, const CumulativeData& data) const;
67   void DumpBins(std::ostream& os) const;
68   Value GetRange(size_t bucket_idx) const;
69   size_t GetBucketCount() const;
70 
SampleSize()71   uint64_t SampleSize() const {
72     return sample_size_;
73   }
74 
Sum()75   Value Sum() const {
76     return sum_;
77   }
78 
AdjustedSum()79   Value AdjustedSum() const {
80     return sum_ * kAdjust;
81   }
82 
Min()83   Value Min() const {
84     return min_value_added_;
85   }
86 
Max()87   Value Max() const {
88     return max_value_added_;
89   }
90 
BucketWidth()91   Value BucketWidth() const {
92     return bucket_width_;
93   }
94 
Name()95   const std::string& Name() const {
96     return name_;
97   }
98 
99  private:
100   void Initialize();
101   size_t FindBucket(Value val) const;
102   void BucketiseValue(Value val);
103   // Add more buckets to the histogram to fill in a new value that exceeded
104   // the max_read_value_.
105   void GrowBuckets(Value val);
106   std::string name_;
107   // Maximum number of buckets.
108   const size_t max_buckets_;
109   // Number of samples placed in histogram.
110   size_t sample_size_;
111   // Width of the bucket range. The lower the value is the more accurate
112   // histogram percentiles are. Grows adaptively when we hit max buckets.
113   Value bucket_width_;
114   // How many occurrences of values fall within a bucket at index i where i covers the range
115   // starting at  min_ + i * bucket_width_ with size bucket_size_.
116   std::vector<uint32_t> frequency_;
117   // Summation of all the elements inputed by the user.
118   Value sum_;
119   // Minimum value that can fit in the histogram. Fixed to zero for now.
120   Value min_;
121   // Maximum value that can fit in the histogram, grows adaptively.
122   Value max_;
123   // Summation of the values entered. Used to calculate variance.
124   Value sum_of_squares_;
125   // Maximum value entered in the histogram.
126   Value min_value_added_;
127   // Minimum value entered in the histogram.
128   Value max_value_added_;
129 
130   DISALLOW_COPY_AND_ASSIGN(Histogram);
131 };
132 }  // namespace art
133 
134 #endif  // ART_LIBARTBASE_BASE_HISTOGRAM_H_
135