1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef TNT_UTILS_PROFILER_H
18 #define TNT_UTILS_PROFILER_H
19 
20 #include <assert.h>
21 #include <stdint.h>
22 
23 #include <array>
24 #include <chrono>
25 
26 #if defined(__linux__)
27 #   include <unistd.h>
28 #   include <sys/ioctl.h>
29 #   include <linux/perf_event.h>
30 #endif
31 
32 namespace utils {
33 
34 class Profiler {
35     enum {
36         INSTRUCTIONS    = 0,   // must be zero
37         CPU_CYCLES      = 1,
38         DCACHE_REFS     = 2,
39         DCACHE_MISSES   = 3,
40         BRANCHES        = 4,
41         BRANCH_MISSES   = 5,
42         ICACHE_REFS     = 6,
43         ICACHE_MISSES   = 7,
44 
45         // Must be last one
46         EVENT_COUNT
47     };
48 
49 public:
50 
51     enum {
52         EV_CPU_CYCLES = 1 << CPU_CYCLES,
53         EV_L1D_REFS   = 1 << DCACHE_REFS,
54         EV_L1D_MISSES = 1 << DCACHE_MISSES,
55         EV_BPU_REFS   = 1 << BRANCHES,
56         EV_BPU_MISSES = 1 << BRANCH_MISSES,
57         EV_L1I_REFS   = 1 << ICACHE_REFS,
58         EV_L1I_MISSES = 1 << ICACHE_MISSES,
59         // helpers
60         EV_L1D_RATES = EV_L1D_REFS | EV_L1D_MISSES,
61         EV_L1I_RATES = EV_L1I_REFS | EV_L1I_MISSES,
62         EV_BPU_RATES = EV_BPU_REFS | EV_BPU_MISSES,
63     };
64 
65     static Profiler& get() noexcept;
66 
67 
68     Profiler(const Profiler& rhs) = delete;
69     Profiler(Profiler&& rhs) = delete;
70     Profiler& operator=(const Profiler& rhs) = delete;
71     Profiler& operator=(Profiler&& rhs) = delete;
72 
73     // selects which events are enabled.
74     // By Default: EV_CPU_CYCLES | EV_L1D_RATES | EV_BPU_RATES
75     uint32_t resetEvents(uint32_t eventMask) noexcept;
76 
getEnabledEvents()77     uint32_t getEnabledEvents() const noexcept { return mEnabledEvents; }
78 
79     // could return false if performance counters are not supported/enabled
isValid()80     bool isValid() const { return mCountersFd[0] >= 0; }
81 
82     class Counters {
83         friend class Profiler;
84         uint64_t nr;
85         uint64_t time_enabled;
86         uint64_t time_running;
87         struct {
88             uint64_t value;
89             uint64_t id;
90         } counters[Profiler::EVENT_COUNT];
91 
92         friend Counters operator-(Counters lhs, const Counters& rhs) noexcept {
93             lhs.nr -= rhs.nr;
94             lhs.time_enabled -= rhs.time_enabled;
95             lhs.time_running -= rhs.time_running;
96             for (size_t i=0 ; i<EVENT_COUNT ; ++i) {
97                 lhs.counters[i].value -= rhs.counters[i].value;
98             }
99             return lhs;
100         }
101 
102     public:
getInstructions()103         uint64_t getInstructions() const        { return counters[INSTRUCTIONS].value; }
getCpuCycles()104         uint64_t getCpuCycles() const           { return counters[CPU_CYCLES].value; }
getL1DReferences()105         uint64_t getL1DReferences() const       { return counters[DCACHE_REFS].value; }
getL1DMisses()106         uint64_t getL1DMisses() const           { return counters[DCACHE_MISSES].value; }
getL1IReferences()107         uint64_t getL1IReferences() const       { return counters[ICACHE_REFS].value; }
getL1IMisses()108         uint64_t getL1IMisses() const           { return counters[ICACHE_MISSES].value; }
getBranchInstructions()109         uint64_t getBranchInstructions() const  { return counters[BRANCHES].value; }
getBranchMisses()110         uint64_t getBranchMisses() const        { return counters[BRANCH_MISSES].value; }
111 
getWallTime()112         std::chrono::duration<uint64_t, std::nano> getWallTime() const {
113             return std::chrono::duration<uint64_t, std::nano>(time_enabled);
114         }
115 
getRunningTime()116         std::chrono::duration<uint64_t, std::nano> getRunningTime() const {
117             return std::chrono::duration<uint64_t, std::nano>(time_running);
118         }
119 
getIPC()120         double getIPC() const noexcept {
121             uint64_t cpuCycles = getCpuCycles();
122             uint64_t instructions = getInstructions();
123             return double(instructions) / double(cpuCycles);
124         }
125 
getCPI()126         double getCPI() const noexcept {
127             uint64_t cpuCycles = getCpuCycles();
128             uint64_t instructions = getInstructions();
129             return double(cpuCycles) / double(instructions);
130         }
131 
getL1DMissRate()132         double getL1DMissRate() const noexcept {
133             uint64_t cacheReferences = getL1DReferences();
134             uint64_t cacheMisses = getL1DMisses();
135             return double(cacheMisses) / double(cacheReferences);
136         }
137 
getL1DHitRate()138         double getL1DHitRate() const noexcept {
139             return 1.0 - getL1DMissRate();
140         }
141 
getL1IMissRate()142         double getL1IMissRate() const noexcept {
143             uint64_t cacheReferences = getL1IReferences();
144             uint64_t cacheMisses = getL1IMisses();
145             return double(cacheMisses) / double(cacheReferences);
146         }
147 
getL1IHitRate()148         double getL1IHitRate() const noexcept {
149             return 1.0 - getL1IMissRate();
150         }
151 
getBranchMissRate()152         double getBranchMissRate() const noexcept {
153             uint64_t branchReferences = getBranchInstructions();
154             uint64_t branchMisses = getBranchMisses();
155             return double(branchMisses) / double(branchReferences);
156         }
157 
getBranchHitRate()158         double getBranchHitRate() const noexcept {
159             return 1.0 - getBranchMissRate();
160         }
161 
getMPKI(uint64_t misses)162         double getMPKI(uint64_t misses) const noexcept {
163             return (misses * 1000.0) / getInstructions();
164         }
165 
166     };
167 
168 #if defined(__linux__)
169 
reset()170     void reset() noexcept {
171         int fd = mCountersFd[0];
172         ioctl(fd, PERF_EVENT_IOC_RESET,  PERF_IOC_FLAG_GROUP);
173     }
174 
start()175     void start() noexcept {
176         int fd = mCountersFd[0];
177         ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
178     }
179 
stop()180     void stop() noexcept {
181         int fd = mCountersFd[0];
182         ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
183     }
184 
readCounters(Counters * outCounters)185     void readCounters(Counters* outCounters) noexcept {
186         Counters counters;
187         ssize_t n = read(mCountersFd[0], &counters, sizeof(Counters));
188         memset(outCounters, 0, sizeof(Counters));
189         if (n > 0) {
190             outCounters->nr = counters.nr;
191             outCounters->time_enabled = counters.time_enabled;
192             outCounters->time_running = counters.time_running;
193             for (size_t i=0 ; i<size_t(EVENT_COUNT) ; i++) {
194                 if (mCountersFd[i] >= 0) {
195                     outCounters->counters[i] = counters.counters[mIds[i]];
196                 }
197             }
198         }
199     }
200 
201 #else // !__linux__
202 
reset()203     void reset() noexcept { }
start()204     void start() noexcept { }
stop()205     void stop() noexcept { }
readCounters(Counters * counters)206     void readCounters(Counters* counters) noexcept { }
207 
208 #endif // __linux__
209 
hasBranchRates()210     bool hasBranchRates() const noexcept {
211         return (mCountersFd[BRANCHES] >= 0) && (mCountersFd[BRANCH_MISSES] >= 0);
212     }
213 
hasICacheRates()214     bool hasICacheRates() const noexcept {
215         return (mCountersFd[ICACHE_REFS] >= 0) && (mCountersFd[ICACHE_MISSES] >= 0);
216     }
217 
218 private:
219     Profiler() noexcept;
220     ~Profiler() noexcept;
221 
222     std::array<uint8_t, EVENT_COUNT> mIds;
223     std::array<int, EVENT_COUNT> mCountersFd;
224     uint32_t mEnabledEvents = 0;
225 };
226 
227 } // namespace utils
228 
229 #endif // TNT_UTILS_PROFILER_H
230