1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef TNT_UTILS_PROFILER_H 18 #define TNT_UTILS_PROFILER_H 19 20 #include <assert.h> 21 #include <stdint.h> 22 23 #include <array> 24 #include <chrono> 25 26 #if defined(__linux__) 27 # include <unistd.h> 28 # include <sys/ioctl.h> 29 # include <linux/perf_event.h> 30 #endif 31 32 namespace utils { 33 34 class Profiler { 35 enum { 36 INSTRUCTIONS = 0, // must be zero 37 CPU_CYCLES = 1, 38 DCACHE_REFS = 2, 39 DCACHE_MISSES = 3, 40 BRANCHES = 4, 41 BRANCH_MISSES = 5, 42 ICACHE_REFS = 6, 43 ICACHE_MISSES = 7, 44 45 // Must be last one 46 EVENT_COUNT 47 }; 48 49 public: 50 51 enum { 52 EV_CPU_CYCLES = 1 << CPU_CYCLES, 53 EV_L1D_REFS = 1 << DCACHE_REFS, 54 EV_L1D_MISSES = 1 << DCACHE_MISSES, 55 EV_BPU_REFS = 1 << BRANCHES, 56 EV_BPU_MISSES = 1 << BRANCH_MISSES, 57 EV_L1I_REFS = 1 << ICACHE_REFS, 58 EV_L1I_MISSES = 1 << ICACHE_MISSES, 59 // helpers 60 EV_L1D_RATES = EV_L1D_REFS | EV_L1D_MISSES, 61 EV_L1I_RATES = EV_L1I_REFS | EV_L1I_MISSES, 62 EV_BPU_RATES = EV_BPU_REFS | EV_BPU_MISSES, 63 }; 64 65 static Profiler& get() noexcept; 66 67 68 Profiler(const Profiler& rhs) = delete; 69 Profiler(Profiler&& rhs) = delete; 70 Profiler& operator=(const Profiler& rhs) = delete; 71 Profiler& operator=(Profiler&& rhs) = delete; 72 73 // selects which events are enabled. 74 // By Default: EV_CPU_CYCLES | EV_L1D_RATES | EV_BPU_RATES 75 uint32_t resetEvents(uint32_t eventMask) noexcept; 76 getEnabledEvents()77 uint32_t getEnabledEvents() const noexcept { return mEnabledEvents; } 78 79 // could return false if performance counters are not supported/enabled isValid()80 bool isValid() const { return mCountersFd[0] >= 0; } 81 82 class Counters { 83 friend class Profiler; 84 uint64_t nr; 85 uint64_t time_enabled; 86 uint64_t time_running; 87 struct { 88 uint64_t value; 89 uint64_t id; 90 } counters[Profiler::EVENT_COUNT]; 91 92 friend Counters operator-(Counters lhs, const Counters& rhs) noexcept { 93 lhs.nr -= rhs.nr; 94 lhs.time_enabled -= rhs.time_enabled; 95 lhs.time_running -= rhs.time_running; 96 for (size_t i=0 ; i<EVENT_COUNT ; ++i) { 97 lhs.counters[i].value -= rhs.counters[i].value; 98 } 99 return lhs; 100 } 101 102 public: getInstructions()103 uint64_t getInstructions() const { return counters[INSTRUCTIONS].value; } getCpuCycles()104 uint64_t getCpuCycles() const { return counters[CPU_CYCLES].value; } getL1DReferences()105 uint64_t getL1DReferences() const { return counters[DCACHE_REFS].value; } getL1DMisses()106 uint64_t getL1DMisses() const { return counters[DCACHE_MISSES].value; } getL1IReferences()107 uint64_t getL1IReferences() const { return counters[ICACHE_REFS].value; } getL1IMisses()108 uint64_t getL1IMisses() const { return counters[ICACHE_MISSES].value; } getBranchInstructions()109 uint64_t getBranchInstructions() const { return counters[BRANCHES].value; } getBranchMisses()110 uint64_t getBranchMisses() const { return counters[BRANCH_MISSES].value; } 111 getWallTime()112 std::chrono::duration<uint64_t, std::nano> getWallTime() const { 113 return std::chrono::duration<uint64_t, std::nano>(time_enabled); 114 } 115 getRunningTime()116 std::chrono::duration<uint64_t, std::nano> getRunningTime() const { 117 return std::chrono::duration<uint64_t, std::nano>(time_running); 118 } 119 getIPC()120 double getIPC() const noexcept { 121 uint64_t cpuCycles = getCpuCycles(); 122 uint64_t instructions = getInstructions(); 123 return double(instructions) / double(cpuCycles); 124 } 125 getCPI()126 double getCPI() const noexcept { 127 uint64_t cpuCycles = getCpuCycles(); 128 uint64_t instructions = getInstructions(); 129 return double(cpuCycles) / double(instructions); 130 } 131 getL1DMissRate()132 double getL1DMissRate() const noexcept { 133 uint64_t cacheReferences = getL1DReferences(); 134 uint64_t cacheMisses = getL1DMisses(); 135 return double(cacheMisses) / double(cacheReferences); 136 } 137 getL1DHitRate()138 double getL1DHitRate() const noexcept { 139 return 1.0 - getL1DMissRate(); 140 } 141 getL1IMissRate()142 double getL1IMissRate() const noexcept { 143 uint64_t cacheReferences = getL1IReferences(); 144 uint64_t cacheMisses = getL1IMisses(); 145 return double(cacheMisses) / double(cacheReferences); 146 } 147 getL1IHitRate()148 double getL1IHitRate() const noexcept { 149 return 1.0 - getL1IMissRate(); 150 } 151 getBranchMissRate()152 double getBranchMissRate() const noexcept { 153 uint64_t branchReferences = getBranchInstructions(); 154 uint64_t branchMisses = getBranchMisses(); 155 return double(branchMisses) / double(branchReferences); 156 } 157 getBranchHitRate()158 double getBranchHitRate() const noexcept { 159 return 1.0 - getBranchMissRate(); 160 } 161 getMPKI(uint64_t misses)162 double getMPKI(uint64_t misses) const noexcept { 163 return (misses * 1000.0) / getInstructions(); 164 } 165 166 }; 167 168 #if defined(__linux__) 169 reset()170 void reset() noexcept { 171 int fd = mCountersFd[0]; 172 ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); 173 } 174 start()175 void start() noexcept { 176 int fd = mCountersFd[0]; 177 ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); 178 } 179 stop()180 void stop() noexcept { 181 int fd = mCountersFd[0]; 182 ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP); 183 } 184 readCounters(Counters * outCounters)185 void readCounters(Counters* outCounters) noexcept { 186 Counters counters; 187 ssize_t n = read(mCountersFd[0], &counters, sizeof(Counters)); 188 memset(outCounters, 0, sizeof(Counters)); 189 if (n > 0) { 190 outCounters->nr = counters.nr; 191 outCounters->time_enabled = counters.time_enabled; 192 outCounters->time_running = counters.time_running; 193 for (size_t i=0 ; i<size_t(EVENT_COUNT) ; i++) { 194 if (mCountersFd[i] >= 0) { 195 outCounters->counters[i] = counters.counters[mIds[i]]; 196 } 197 } 198 } 199 } 200 201 #else // !__linux__ 202 reset()203 void reset() noexcept { } start()204 void start() noexcept { } stop()205 void stop() noexcept { } readCounters(Counters * counters)206 void readCounters(Counters* counters) noexcept { } 207 208 #endif // __linux__ 209 hasBranchRates()210 bool hasBranchRates() const noexcept { 211 return (mCountersFd[BRANCHES] >= 0) && (mCountersFd[BRANCH_MISSES] >= 0); 212 } 213 hasICacheRates()214 bool hasICacheRates() const noexcept { 215 return (mCountersFd[ICACHE_REFS] >= 0) && (mCountersFd[ICACHE_MISSES] >= 0); 216 } 217 218 private: 219 Profiler() noexcept; 220 ~Profiler() noexcept; 221 222 std::array<uint8_t, EVENT_COUNT> mIds; 223 std::array<int, EVENT_COUNT> mCountersFd; 224 uint32_t mEnabledEvents = 0; 225 }; 226 227 } // namespace utils 228 229 #endif // TNT_UTILS_PROFILER_H 230