1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <android/log.h>
18 #include <math.h>
19 #include <stdlib.h>
20 #include <unistd.h>
21
22 #include "Bench.h"
23
24
Bench()25 Bench::Bench()
26 {
27 mTimeBucket = NULL;
28 mTimeBuckets = 0;
29 mTimeBucketDivisor = 1;
30
31 mMemLatencyLastSize = 0;
32 mMemDst = NULL;
33 mMemSrc = NULL;
34 mMemLoopCount = 0;
35 }
36
37
~Bench()38 Bench::~Bench()
39 {
40 }
41
getTimeNanos() const42 uint64_t Bench::getTimeNanos() const
43 {
44 struct timespec t;
45 clock_gettime(CLOCK_MONOTONIC, &t);
46 return t.tv_nsec + ((uint64_t)t.tv_sec * 1000 * 1000 * 1000);
47 }
48
getTimeMillis() const49 uint64_t Bench::getTimeMillis() const
50 {
51 return getTimeNanos() / 1000000;
52 }
53
54
testWork(void * usr,uint32_t idx)55 void Bench::testWork(void *usr, uint32_t idx)
56 {
57 Bench *b = (Bench *)usr;
58 //__android_log_print(ANDROID_LOG_INFO, "bench", "test %i %p", idx, b);
59
60 float f1 = 0.f;
61 float f2 = 0.f;
62 float f3 = 0.f;
63 float f4 = 0.f;
64
65 float *ipk = b->mIpKernel[idx];
66 volatile float *src = b->mSrcBuf[idx];
67 volatile float *out = b->mOutBuf[idx];
68
69 //__android_log_print(ANDROID_LOG_INFO, "bench", "test %p %p %p", ipk, src, out);
70
71 do {
72
73 for (int i = 0; i < 1024; i++) {
74 f1 += src[i * 4] * ipk[i];
75 f2 += src[i * 4 + 1] * ipk[i];
76 f3 += src[i * 4 + 2] * ipk[i];
77 f4 += sqrtf(f1 + f2 + f3);
78 }
79 out[0] = f1;
80 out[1] = f2;
81 out[2] = f3;
82 out[3] = f4;
83
84 } while (b->incTimeBucket());
85 }
86
initIP()87 bool Bench::initIP() {
88 int workers = mWorkers.getWorkerCount();
89
90 mIpKernel = new float *[workers];
91 mSrcBuf = new float *[workers];
92 mOutBuf = new float *[workers];
93
94 for (int i = 0; i < workers; i++) {
95 mIpKernel[i] = new float[1024];
96 mSrcBuf[i] = new float[4096];
97 mOutBuf[i] = new float[4];
98 }
99
100 return true;
101 }
102
runPowerManagementTest(uint64_t options)103 bool Bench::runPowerManagementTest(uint64_t options) {
104 //__android_log_print(ANDROID_LOG_INFO, "bench", "rpmt x %i", options);
105
106 mTimeBucketDivisor = 1000 * 1000; // use ms
107 allocateBuckets(2 * 1000);
108
109 usleep(2 * 1000 * 1000);
110
111 //__android_log_print(ANDROID_LOG_INFO, "bench", "rpmt 2 b %i", mTimeBuckets);
112
113 mTimeStartNanos = getTimeNanos();
114 mTimeEndNanos = mTimeStartNanos + mTimeBuckets * mTimeBucketDivisor;
115 memset(mTimeBucket, 0, sizeof(uint32_t) * mTimeBuckets);
116
117 bool useMT = false;
118
119 //__android_log_print(ANDROID_LOG_INFO, "bench", "rpmt 2.1 b %i", mTimeBuckets);
120 mTimeEndGroupNanos = mTimeStartNanos;
121 do {
122 // Advance 8ms
123 mTimeEndGroupNanos += 8 * 1000 * 1000;
124
125 int threads = useMT ? 1 : 0;
126 useMT = !useMT;
127 if ((options & 0x1f) != 0) {
128 threads = options & 0x1f;
129 }
130
131 //__android_log_print(ANDROID_LOG_INFO, "bench", "threads %i", threads);
132
133 mWorkers.launchWork(testWork, this, threads);
134 } while (mTimeEndGroupNanos <= mTimeEndNanos);
135
136 return true;
137 }
138
allocateBuckets(size_t bucketCount)139 bool Bench::allocateBuckets(size_t bucketCount) {
140 if (bucketCount == mTimeBuckets) {
141 return true;
142 }
143
144 if (mTimeBucket != NULL) {
145 delete[] mTimeBucket;
146 mTimeBucket = NULL;
147 }
148
149 mTimeBuckets = bucketCount;
150 if (mTimeBuckets > 0) {
151 mTimeBucket = new uint32_t[mTimeBuckets];
152 }
153
154 return true;
155 }
156
init()157 bool Bench::init() {
158 mWorkers.init();
159
160 initIP();
161 //ALOGV("%p Launching thread(s), CPUs %i", mRSC, mWorkers.mCount + 1);
162
163 return true;
164 }
165
incTimeBucket() const166 bool Bench::incTimeBucket() const {
167 uint64_t time = getTimeNanos();
168 uint64_t bucket = (time - mTimeStartNanos) / mTimeBucketDivisor;
169
170 if (bucket >= mTimeBuckets) {
171 return false;
172 }
173
174 __sync_fetch_and_add(&mTimeBucket[bucket], 1);
175
176 return time < mTimeEndGroupNanos;
177 }
178
getData(float * data,size_t count) const179 void Bench::getData(float *data, size_t count) const {
180 if (count > mTimeBuckets) {
181 count = mTimeBuckets;
182 }
183 for (size_t ct = 0; ct < count; ct++) {
184 data[ct] = (float)mTimeBucket[ct];
185 }
186 }
187
runCPUHeatSoak(uint64_t)188 bool Bench::runCPUHeatSoak(uint64_t /* options */)
189 {
190 mTimeBucketDivisor = 1000 * 1000; // use ms
191 allocateBuckets(1000);
192
193 mTimeStartNanos = getTimeNanos();
194 mTimeEndNanos = mTimeStartNanos + mTimeBuckets * mTimeBucketDivisor;
195 memset(mTimeBucket, 0, sizeof(uint32_t) * mTimeBuckets);
196
197 mTimeEndGroupNanos = mTimeEndNanos;
198 mWorkers.launchWork(testWork, this, 0);
199 return true;
200 }
201
runMemoryBandwidthTest(uint64_t size)202 float Bench::runMemoryBandwidthTest(uint64_t size)
203 {
204 uint64_t t1 = getTimeMillis();
205 for (size_t ct = mMemLoopCount; ct > 0; ct--) {
206 memcpy(mMemDst, mMemSrc, size);
207 }
208 double dt = getTimeMillis() - t1;
209 dt /= 1000;
210
211 double bw = ((double)size) * mMemLoopCount / dt;
212 bw /= 1024 * 1024 * 1024;
213
214 float targetTime = 0.2f;
215 if (dt > targetTime) {
216 mMemLoopCount = (size_t)((double)mMemLoopCount / (dt / targetTime));
217 }
218
219 return (float)bw;
220 }
221
runMemoryLatencyTest(uint64_t size)222 float Bench::runMemoryLatencyTest(uint64_t size)
223 {
224 //__android_log_print(ANDROID_LOG_INFO, "bench", "latency %i", (int)size);
225 void ** sp = (void **)mMemSrc;
226 size_t maxIndex = size / sizeof(void *);
227 size_t loops = ((maxIndex / 2) & (~3));
228 //loops = 10;
229
230 if (size != mMemLatencyLastSize) {
231 __android_log_print(ANDROID_LOG_INFO, "bench", "latency build %i %i", (int)maxIndex, loops);
232 mMemLatencyLastSize = size;
233 memset((void *)mMemSrc, 0, mMemLatencyLastSize);
234
235 size_t lastIdx = 0;
236 for (size_t ct = 0; ct < loops; ct++) {
237 size_t ni = rand() * rand();
238 ni = ni % maxIndex;
239 while ((sp[ni] != NULL) || (ni == lastIdx)) {
240 ni++;
241 if (ni >= maxIndex) {
242 ni = 1;
243 }
244 // __android_log_print(ANDROID_LOG_INFO, "bench", "gen ni loop %i %i", lastIdx, ni);
245 }
246 // __android_log_print(ANDROID_LOG_INFO, "bench", "gen ct = %i %i %i %p %p", (int)ct, lastIdx, ni, &sp[lastIdx], &sp[ni]);
247 sp[lastIdx] = &sp[ni];
248 lastIdx = ni;
249 }
250 sp[lastIdx] = 0;
251 }
252 //__android_log_print(ANDROID_LOG_INFO, "bench", "latency testing");
253
254 uint64_t t1 = getTimeNanos();
255 for (size_t ct = mMemLoopCount; ct > 0; ct--) {
256 size_t lc = 1;
257 volatile void *p = sp[0];
258 while (p != NULL) {
259 // Unroll once to minimize branching overhead.
260 void **pn = (void **)p;
261 p = pn[0];
262 pn = (void **)p;
263 p = pn[0];
264 }
265 }
266 //__android_log_print(ANDROID_LOG_INFO, "bench", "v %i %i", loops * mMemLoopCount, v);
267
268 double dt = getTimeNanos() - t1;
269 double dts = dt / 1000000000;
270 double lat = dt / (loops * mMemLoopCount);
271 __android_log_print(ANDROID_LOG_INFO, "bench", "latency ret %f", lat);
272
273 float targetTime = 0.2f;
274 if (dts > targetTime) {
275 mMemLoopCount = (size_t)((double)mMemLoopCount / (dts / targetTime));
276 if (mMemLoopCount < 1) {
277 mMemLoopCount = 1;
278 }
279 }
280
281 return (float)lat;
282 }
283
startMemTests()284 bool Bench::startMemTests()
285 {
286 mMemSrc = (uint8_t *)malloc(1024*1024*64);
287 mMemDst = (uint8_t *)malloc(1024*1024*64);
288
289 memset(mMemSrc, 0, 1024*1024*16);
290 memset(mMemDst, 0, 1024*1024*16);
291
292 mMemLoopCount = 1;
293 uint64_t start = getTimeMillis();
294 while((getTimeMillis() - start) < 500) {
295 memcpy(mMemDst, mMemSrc, 1024);
296 mMemLoopCount++;
297 }
298 mMemLatencyLastSize = 0;
299 return true;
300 }
301
endMemTests()302 void Bench::endMemTests()
303 {
304 free(mMemSrc);
305 free(mMemDst);
306 mMemSrc = NULL;
307 mMemDst = NULL;
308 mMemLatencyLastSize = 0;
309 }
310
GflopKernelC()311 void Bench::GflopKernelC() {
312 int halfKX = (mGFlop.kernelXSize / 2);
313 for (int x = halfKX; x < (mGFlop.imageXSize - halfKX - 1); x++) {
314 const float * krnPtr = mGFlop.kernelBuffer;
315 float sum = 0.f;
316
317 int srcInc = mGFlop.imageXSize - mGFlop.kernelXSize;
318 const float * srcPtr = &mGFlop.srcBuffer[x - halfKX];
319
320 for (int ix = 0; ix < mGFlop.kernelXSize; ix++) {
321 sum += srcPtr[0] * krnPtr[0];
322 krnPtr++;
323 srcPtr++;
324 }
325
326 float * dstPtr = &mGFlop.dstBuffer[x];
327 dstPtr[0] = sum;
328
329 }
330
331 }
332
GflopKernelC_y3()333 void Bench::GflopKernelC_y3() {
334 }
335
runGFlopsTest(uint64_t)336 float Bench::runGFlopsTest(uint64_t /* options */)
337 {
338 mTimeBucketDivisor = 1000 * 1000; // use ms
339 allocateBuckets(1000);
340
341 mTimeStartNanos = getTimeNanos();
342 mTimeEndNanos = mTimeStartNanos + mTimeBuckets * mTimeBucketDivisor;
343 memset(mTimeBucket, 0, sizeof(uint32_t) * mTimeBuckets);
344
345 mTimeEndGroupNanos = mTimeEndNanos;
346 mWorkers.launchWork(testWork, this, 0);
347
348 // Simulate image convolve
349 mGFlop.kernelXSize = 27;
350 mGFlop.imageXSize = 1024 * 1024;
351
352 mGFlop.srcBuffer = (float *)malloc(mGFlop.imageXSize * sizeof(float));
353 mGFlop.dstBuffer = (float *)malloc(mGFlop.imageXSize * sizeof(float));
354 mGFlop.kernelBuffer = (float *)malloc(mGFlop.kernelXSize * sizeof(float));
355
356 double ops = mGFlop.kernelXSize;
357 ops = ops * 2.f - 1.f;
358 ops *= mGFlop.imageXSize;
359
360 uint64_t t1 = getTimeNanos();
361 GflopKernelC();
362 double dt = getTimeNanos() - t1;
363
364 dt /= 1000.f * 1000.f * 1000.f;
365
366 double gflops = ops / dt / 1000000000.f;
367
368 __android_log_print(ANDROID_LOG_INFO, "bench", "v %f %f %f", dt, ops, gflops);
369
370 return (float)gflops;
371 }
372
373
374