1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Our goal is to measure the cost of various C++ atomic operations.
18 // Android doesn't really control those. But since some of these operations can be quite
19 // expensive, this may be useful input for development of higher level code.
20 // Expected mappings from C++ atomics to hardware primitives can be found at
21 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html .
22 
23 #include <atomic>
24 #include <mutex>
25 
26 #include <benchmark/benchmark.h>
27 #include "util.h"
28 
29 // We time atomic operations separated by a volatile (not atomic!) increment.  This ensures
30 // that the compiler emits memory instructions (e.g. load or store) prior to any fence or the
31 // like.  That in turn ensures that the CPU has outstanding memory operations when the fence
32 // is executed.
33 
34 // In most respects, we compute best case values. Since there is only one thread, there are no
35 // coherence misses.
36 
37 // We assume that the compiler is not smart enough to optimize away fences in a single-threaded
38 // program. If that changes, we'll need to add a second thread.
39 
40 static volatile unsigned counter;
41 
42 std::atomic<int> test_loc(0);
43 
44 static volatile unsigned sink;
45 
46 static std::mutex mtx;
47 
BM_atomic_empty(benchmark::State & state)48 void BM_atomic_empty(benchmark::State& state) {
49   while (state.KeepRunning()) {
50     ++counter;
51   }
52 }
53 BIONIC_BENCHMARK(BM_atomic_empty);
54 
BM_atomic_load_relaxed(benchmark::State & state)55 static void BM_atomic_load_relaxed(benchmark::State& state) {
56   unsigned result = 0;
57   while (state.KeepRunning()) {
58     result += test_loc.load(std::memory_order_relaxed);
59     ++counter;
60   }
61   sink = result;
62 }
63 BIONIC_BENCHMARK(BM_atomic_load_relaxed);
64 
BM_atomic_load_acquire(benchmark::State & state)65 static void BM_atomic_load_acquire(benchmark::State& state) {
66   unsigned result = 0;
67   while (state.KeepRunning()) {
68     result += test_loc.load(std::memory_order_acquire);
69     ++counter;
70   }
71   sink = result;
72 }
73 BIONIC_BENCHMARK(BM_atomic_load_acquire);
74 
BM_atomic_store_release(benchmark::State & state)75 static void BM_atomic_store_release(benchmark::State& state) {
76   int i = counter;
77   while (state.KeepRunning()) {
78     test_loc.store(++i, std::memory_order_release);
79     ++counter;
80   }
81 }
82 BIONIC_BENCHMARK(BM_atomic_store_release);
83 
BM_atomic_store_seq_cst(benchmark::State & state)84 static void BM_atomic_store_seq_cst(benchmark::State& state) {
85   int i = counter;
86   while (state.KeepRunning()) {
87     test_loc.store(++i, std::memory_order_seq_cst);
88     ++counter;
89   }
90 }
91 BIONIC_BENCHMARK(BM_atomic_store_seq_cst);
92 
BM_atomic_fetch_add_relaxed(benchmark::State & state)93 static void BM_atomic_fetch_add_relaxed(benchmark::State& state) {
94   unsigned result = 0;
95   while (state.KeepRunning()) {
96     result += test_loc.fetch_add(1, std::memory_order_relaxed);
97     ++counter;
98   }
99   sink = result;
100 }
101 BIONIC_BENCHMARK(BM_atomic_fetch_add_relaxed);
102 
BM_atomic_fetch_add_seq_cst(benchmark::State & state)103 static void BM_atomic_fetch_add_seq_cst(benchmark::State& state) {
104   unsigned result = 0;
105   while (state.KeepRunning()) {
106     result += test_loc.fetch_add(1, std::memory_order_seq_cst);
107     ++counter;
108   }
109   sink = result;
110 }
111 BIONIC_BENCHMARK(BM_atomic_fetch_add_seq_cst);
112 
113 // The fence benchmarks include a relaxed load to make it much harder to optimize away
114 // the fence.
115 
BM_atomic_acquire_fence(benchmark::State & state)116 static void BM_atomic_acquire_fence(benchmark::State& state) {
117   unsigned result = 0;
118   while (state.KeepRunning()) {
119     result += test_loc.load(std::memory_order_relaxed);
120     std::atomic_thread_fence(std::memory_order_acquire);
121     ++counter;
122   }
123   sink = result;
124 }
125 BIONIC_BENCHMARK(BM_atomic_acquire_fence);
126 
BM_atomic_seq_cst_fence(benchmark::State & state)127 static void BM_atomic_seq_cst_fence(benchmark::State& state) {
128   unsigned result = 0;
129   while (state.KeepRunning()) {
130     result += test_loc.load(std::memory_order_relaxed);
131     std::atomic_thread_fence(std::memory_order_seq_cst);
132     ++counter;
133   }
134   sink = result;
135 }
136 BIONIC_BENCHMARK(BM_atomic_seq_cst_fence);
137 
138 // For comparison, also throw in a critical section version:
139 
BM_atomic_fetch_add_cs(benchmark::State & state)140 static void BM_atomic_fetch_add_cs(benchmark::State& state) {
141   unsigned result = 0;
142   while (state.KeepRunning()) {
143     {
144       std::lock_guard<std::mutex> _(mtx);
145       result += ++counter;
146     }
147   }
148   sink = result;
149 }
150 BIONIC_BENCHMARK(BM_atomic_fetch_add_cs);
151