1 /**
2 * Copyright 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "run_tflite.h"
18
19 #include <android/log.h>
20 #include <dirent.h>
21 #include <dlfcn.h>
22 #include <fcntl.h>
23 #include <ftw.h>
24 #include <sys/time.h>
25 #include <unistd.h>
26
27 #include <cstdio>
28 #include <fstream>
29
30 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
31 #include "tensorflow/lite/kernels/register.h"
32 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
33
34 #define LOG_TAG "NN_BENCHMARK"
35
36 #define FATAL(fmt, ...) \
37 do { \
38 __android_log_print(ANDROID_LOG_FATAL, LOG_TAG, fmt, ##__VA_ARGS__); \
39 assert(false); \
40 } while (0)
41
42 namespace {
43
currentTimeInUsec()44 long long currentTimeInUsec() {
45 timeval tv;
46 gettimeofday(&tv, NULL);
47 return ((tv.tv_sec * 1000000L) + tv.tv_usec);
48 }
49
50 // Workaround for build systems that make difficult to pick the correct NDK API
51 // level. NDK tracing methods are dynamically loaded from libandroid.so.
52 typedef void* (*fp_ATrace_beginSection)(const char* sectionName);
53 typedef void* (*fp_ATrace_endSection)();
54 struct TraceFunc {
55 fp_ATrace_beginSection ATrace_beginSection;
56 fp_ATrace_endSection ATrace_endSection;
57 };
setupTraceFunc()58 TraceFunc setupTraceFunc() {
59 void* lib = dlopen("libandroid.so", RTLD_NOW | RTLD_LOCAL);
60 if (lib == nullptr) {
61 FATAL("unable to open libandroid.so");
62 }
63 return {
64 reinterpret_cast<fp_ATrace_beginSection>(
65 dlsym(lib, "ATrace_beginSection")),
66 reinterpret_cast<fp_ATrace_endSection>(dlsym(lib, "ATrace_endSection"))};
67 }
68 static TraceFunc kTraceFunc{setupTraceFunc()};
69
70 } // namespace
71
create(const char * modelfile,bool use_nnapi,bool enable_intermediate_tensors_dump,int * nnapiErrno,const char * nnapi_device_name,bool mmapModel,const char * nnapi_cache_dir)72 BenchmarkModel* BenchmarkModel::create(const char* modelfile, bool use_nnapi,
73 bool enable_intermediate_tensors_dump, int* nnapiErrno,
74 const char* nnapi_device_name, bool mmapModel,
75 const char* nnapi_cache_dir) {
76 BenchmarkModel* model = new BenchmarkModel();
77 if (!model->init(modelfile, use_nnapi, enable_intermediate_tensors_dump, nnapiErrno,
78 nnapi_device_name, mmapModel, nnapi_cache_dir)) {
79 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to init model %s", modelfile);
80 delete model;
81 return nullptr;
82 }
83 return model;
84 }
85
init(const char * modelfile,bool use_nnapi,bool enable_intermediate_tensors_dump,int * nnapiErrno,const char * nnapi_device_name,bool mmapModel,const char * nnapi_cache_dir)86 bool BenchmarkModel::init(const char* modelfile, bool use_nnapi,
87 bool enable_intermediate_tensors_dump, int* nnapiErrno,
88 const char* nnapi_device_name, bool mmapModel,
89 const char* nnapi_cache_dir) {
90 mModelFile = modelfile;
91 mUseNnApi = use_nnapi;
92 if (nnapi_cache_dir) {
93 mCacheDir = nnapi_cache_dir;
94 }
95 if (nnapi_device_name) {
96 mNnApiDeviceName = nnapi_device_name;
97 }
98
99 if (mmapModel) {
100 // Memory map the model. NOTE this needs lifetime greater than or equal
101 // to interpreter context.
102 mTfliteModel = tflite::FlatBufferModel::BuildFromFile(modelfile);
103 } else {
104 std::ifstream t(modelfile);
105 mModelBuffer = std::string((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
106 mTfliteModel = tflite::FlatBufferModel::BuildFromBuffer(mModelBuffer.c_str(), mModelBuffer.size());
107 }
108 if (!mTfliteModel) {
109 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to load model %s",
110 modelfile);
111 return false;
112 }
113
114 tflite::ops::builtin::BuiltinOpResolver resolver;
115 tflite::InterpreterBuilder(*mTfliteModel, resolver)(&mTfliteInterpreter);
116 if (!mTfliteInterpreter) {
117 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
118 "Failed to create TFlite interpreter");
119 return false;
120 }
121
122 if (enable_intermediate_tensors_dump) {
123 // Make output of every op a model output. This way we will be able to
124 // fetch each intermediate tensor when running with delegates.
125 outputs.clear();
126 for (size_t node = 0; node < mTfliteInterpreter->nodes_size(); ++node) {
127 auto node_outputs =
128 mTfliteInterpreter->node_and_registration(node)->first.outputs;
129 outputs.insert(outputs.end(), node_outputs->data,
130 node_outputs->data + node_outputs->size);
131 }
132 mTfliteInterpreter->SetOutputs(outputs);
133 }
134
135 // Allow Fp16 precision for all models
136 mTfliteInterpreter->SetAllowFp16PrecisionForFp32(true);
137
138 if (use_nnapi) {
139 tflite::StatefulNnApiDelegate::Options nnapi_options;
140 nnapi_options.accelerator_name = nnapi_device_name;
141 mTfliteNnapiDelegate = std::make_unique<tflite::StatefulNnApiDelegate>(nnapi_options);
142 int delegationStatus = mTfliteInterpreter->ModifyGraphWithDelegate(mTfliteNnapiDelegate.get());
143 *nnapiErrno = mTfliteNnapiDelegate->GetNnApiErrno();
144 if (delegationStatus != kTfLiteOk ||
145 *nnapiErrno != ANEURALNETWORKS_NO_ERROR) {
146 __android_log_print(
147 ANDROID_LOG_ERROR, LOG_TAG,
148 "Failed to initialize NNAPI Delegate for model %s, nnapi_errno is %d",
149 modelfile, *nnapiErrno);
150 return false;
151 }
152 }
153 return true;
154 }
155
BenchmarkModel()156 BenchmarkModel::BenchmarkModel() {}
~BenchmarkModel()157 BenchmarkModel::~BenchmarkModel() {}
158
setInput(const uint8_t * dataPtr,size_t length)159 bool BenchmarkModel::setInput(const uint8_t* dataPtr, size_t length) {
160 int input = mTfliteInterpreter->inputs()[0];
161 auto* input_tensor = mTfliteInterpreter->tensor(input);
162
163 switch (input_tensor->type) {
164 case kTfLiteFloat32:
165 case kTfLiteUInt8: {
166 void* raw = input_tensor->data.raw;
167 memcpy(raw, dataPtr, length);
168 break;
169 }
170 default:
171 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
172 "Input tensor type not supported");
173 return false;
174 }
175 return true;
176 }
saveInferenceOutput(InferenceResult * result,int output_index)177 void BenchmarkModel::saveInferenceOutput(InferenceResult* result,
178 int output_index) {
179 int output = mTfliteInterpreter->outputs()[output_index];
180 auto* output_tensor = mTfliteInterpreter->tensor(output);
181 auto& sink = result->inferenceOutputs[output_index];
182 sink.insert(sink.end(), output_tensor->data.uint8,
183 output_tensor->data.uint8 + output_tensor->bytes);
184 }
185
getOutputError(const uint8_t * expected_data,size_t length,InferenceResult * result,int output_index)186 void BenchmarkModel::getOutputError(const uint8_t* expected_data, size_t length,
187 InferenceResult* result, int output_index) {
188 int output = mTfliteInterpreter->outputs()[output_index];
189 auto* output_tensor = mTfliteInterpreter->tensor(output);
190 if (output_tensor->bytes != length) {
191 FATAL("Wrong size of output tensor, expected %zu, is %zu",
192 output_tensor->bytes, length);
193 }
194
195 size_t elements_count = 0;
196 float err_sum = 0.0;
197 float max_error = 0.0;
198 switch (output_tensor->type) {
199 case kTfLiteUInt8: {
200 uint8_t* output_raw = mTfliteInterpreter->typed_tensor<uint8_t>(output);
201 elements_count = output_tensor->bytes;
202 for (size_t i = 0; i < output_tensor->bytes; ++i) {
203 float err = ((float)output_raw[i]) - ((float)expected_data[i]);
204 if (err > max_error) max_error = err;
205 err_sum += err * err;
206 }
207 break;
208 }
209 case kTfLiteFloat32: {
210 const float* expected = reinterpret_cast<const float*>(expected_data);
211 float* output_raw = mTfliteInterpreter->typed_tensor<float>(output);
212 elements_count = output_tensor->bytes / sizeof(float);
213 for (size_t i = 0; i < output_tensor->bytes / sizeof(float); ++i) {
214 float err = output_raw[i] - expected[i];
215 if (err > max_error) max_error = err;
216 err_sum += err * err;
217 }
218 break;
219 }
220 default:
221 FATAL("Output sensor type %d not supported", output_tensor->type);
222 }
223 result->meanSquareErrors[output_index] = err_sum / elements_count;
224 result->maxSingleErrors[output_index] = max_error;
225 }
226
resizeInputTensors(std::vector<int> shape)227 bool BenchmarkModel::resizeInputTensors(std::vector<int> shape) {
228 // The benchmark only expects single input tensor, hardcoded as 0.
229 int input = mTfliteInterpreter->inputs()[0];
230 mTfliteInterpreter->ResizeInputTensor(input, shape);
231 if (mTfliteInterpreter->AllocateTensors() != kTfLiteOk) {
232 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
233 "Failed to allocate tensors!");
234 return false;
235 }
236 return true;
237 }
238
runInference()239 bool BenchmarkModel::runInference() {
240 auto status = mTfliteInterpreter->Invoke();
241 auto nnapi_errno = mTfliteNnapiDelegate
242 ? mTfliteNnapiDelegate->GetNnApiErrno()
243 : ANEURALNETWORKS_NO_ERROR;
244 if (status != kTfLiteOk || nnapi_errno != ANEURALNETWORKS_NO_ERROR) {
245 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
246 "Failed to invoke, tflite status: %d, nnapi errno: %d!",
247 (int)status, nnapi_errno);
248 return false;
249 }
250 return true;
251 }
252
resetStates()253 bool BenchmarkModel::resetStates() {
254 auto status = mTfliteInterpreter->ResetVariableTensors();
255 if (status != kTfLiteOk) {
256 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
257 "Failed to reset variable tensors: %d!", (int)status);
258 return false;
259 }
260 return true;
261 }
262
benchmark(const std::vector<InferenceInOutSequence> & inOutData,int seqInferencesMaxCount,float timeout,int flags,std::vector<InferenceResult> * results)263 bool BenchmarkModel::benchmark(
264 const std::vector<InferenceInOutSequence>& inOutData,
265 int seqInferencesMaxCount, float timeout, int flags,
266 std::vector<InferenceResult>* results) {
267 if (inOutData.empty()) {
268 __android_log_print(ANDROID_LOG_WARN, LOG_TAG,
269 "Input/output vector is empty");
270 return true;
271 }
272
273 float inferenceTotal = 0.0;
274 for (int seqInferenceIndex = 0; seqInferenceIndex < seqInferencesMaxCount;
275 ++seqInferenceIndex) {
276 resetStates();
277
278 const int inputOutputSequenceIndex = seqInferenceIndex % inOutData.size();
279 const InferenceInOutSequence& seq = inOutData[inputOutputSequenceIndex];
280 for (int i = 0; i < seq.size(); ++i) {
281 const InferenceInOut& data = seq[i];
282
283 // For NNAPI systrace usage documentation, see
284 // frameworks/ml/nn/common/include/Tracing.h.
285 kTraceFunc.ATrace_beginSection("[NN_LA_PE]BenchmarkModel::benchmark");
286 kTraceFunc.ATrace_beginSection("[NN_LA_PIO]BenchmarkModel::input");
287 if (data.input) {
288 setInput(data.input, data.input_size);
289 } else {
290 int input = mTfliteInterpreter->inputs()[0];
291 auto* input_tensor = mTfliteInterpreter->tensor(input);
292 if (!data.createInput((uint8_t*)input_tensor->data.raw,
293 input_tensor->bytes)) {
294 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
295 "Input creation %d failed", i);
296 return false;
297 }
298 }
299 kTraceFunc.ATrace_endSection();
300 long long startTime = currentTimeInUsec();
301 const bool success = runInference();
302 kTraceFunc.ATrace_endSection();
303 long long endTime = currentTimeInUsec();
304 if (!success) {
305 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Inference %d failed",
306 i);
307 return false;
308 }
309
310 float inferenceTime =
311 static_cast<float>(endTime - startTime) / 1000000.0f;
312 size_t outputsCount = mTfliteInterpreter->outputs().size();
313 InferenceResult result{
314 inferenceTime, {}, {}, {}, inputOutputSequenceIndex, i};
315 result.meanSquareErrors.resize(outputsCount);
316 result.maxSingleErrors.resize(outputsCount);
317 result.inferenceOutputs.resize(outputsCount);
318
319 if ((flags & FLAG_IGNORE_GOLDEN_OUTPUT) == 0) {
320 if (outputsCount != data.outputs.size()) {
321 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
322 "Golden/actual outputs (%zu/%zu) count mismatch",
323 data.outputs.size(), outputsCount);
324 return false;
325 }
326 for (int j = 0; j < outputsCount; ++j) {
327 getOutputError(data.outputs[j].ptr, data.outputs[j].size, &result, j);
328 }
329 }
330
331 if ((flags & FLAG_DISCARD_INFERENCE_OUTPUT) == 0) {
332 for (int j = 0; j < outputsCount; ++j) {
333 saveInferenceOutput(&result, j);
334 }
335 }
336 results->push_back(result);
337 inferenceTotal += inferenceTime;
338 }
339
340 // Timeout?
341 if (timeout > 0.001 && inferenceTotal > timeout) {
342 return true;
343 }
344 }
345 return true;
346 }
347
348 // If cacheDir is not nullptr, compilation caching will be used with NNAPI.
runCompilation(const char * cacheDir)349 bool BenchmarkModel::runCompilation(const char* cacheDir) {
350 std::unique_ptr<tflite::Interpreter> interpreter;
351 tflite::ops::builtin::BuiltinOpResolver resolver;
352 tflite::InterpreterBuilder(*mTfliteModel, resolver)(&interpreter);
353 if (!interpreter) {
354 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to create TFlite interpreter");
355 return false;
356 }
357
358 // Allow Fp16 precision for all models
359 interpreter->SetAllowFp16PrecisionForFp32(true);
360
361 if (mUseNnApi) {
362 tflite::StatefulNnApiDelegate::Options nnapi_options;
363 nnapi_options.accelerator_name = mNnApiDeviceName.empty() ? nullptr : mNnApiDeviceName.c_str();
364 if (cacheDir) {
365 nnapi_options.cache_dir = cacheDir;
366 nnapi_options.model_token = mModelFile.c_str();
367 }
368 tflite::StatefulNnApiDelegate delegate(nnapi_options);
369 int delegationStatus = interpreter->ModifyGraphWithDelegate(&delegate);
370 auto nnapiErrno = delegate.GetNnApiErrno();
371 if (delegationStatus != kTfLiteOk || nnapiErrno != ANEURALNETWORKS_NO_ERROR) {
372 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
373 "Failed to initialize NNAPI Delegate for model %s, nnapi_errno is %d",
374 mModelFile.c_str(), nnapiErrno);
375 return false;
376 }
377 }
378 return true;
379 }
380
381 // A helper class to manage the lifetime of a temporary cache directory.
382 class ScopedTempDirectory {
383 public:
ScopedTempDirectory(std::string base)384 ScopedTempDirectory(std::string base) : mBase(std::move(base)) {}
~ScopedTempDirectory()385 ~ScopedTempDirectory() { cleanup(); }
386
387 // Create a new temp directory, remove the old one if needed.
recreate()388 void recreate() {
389 cleanup();
390 mTempDir = mBase + "/XXXXXX";
391 mkdtemp(&mTempDir[0]);
392 }
393
394 // Get the path to the temp directory.
get() const395 const char* get() const { return mTempDir.empty() ? nullptr : mTempDir.c_str(); }
396
397 private:
cleanup()398 void cleanup() {
399 if (mTempDir.empty()) {
400 return;
401 }
402 auto callback = [](const char* entry, const struct stat*, int, struct FTW*) {
403 return remove(entry);
404 };
405 nftw(mTempDir.c_str(), callback, 128, FTW_DEPTH | FTW_MOUNT | FTW_PHYS);
406 mTempDir.clear();
407 }
408
409 std::string mBase;
410 std::string mTempDir;
411 };
412
getCompilationCacheSize(int * cacheSizeBytes)413 bool BenchmarkModel::getCompilationCacheSize(int* cacheSizeBytes) {
414 if (cacheSizeBytes == nullptr) return false;
415
416 // Create cache files.
417 ScopedTempDirectory tempDir(mCacheDir.value());
418 tempDir.recreate();
419 const bool success = runCompilation(tempDir.get());
420 if (!success) {
421 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Save to cache failed");
422 return false;
423 }
424
425 // Compute total size of cache files.
426 int totalSize = 0;
427 DIR* dir = opendir(tempDir.get());
428 if (dir == nullptr) {
429 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to open cache directory");
430 return false;
431 }
432 struct dirent* dp = nullptr;
433 while ((dp = readdir(dir)) != nullptr) {
434 char fullPath[1024];
435 snprintf(fullPath, 1024, "%s/%s", tempDir.get(), dp->d_name);
436 struct stat st;
437 int err = stat(fullPath, &st);
438 if (err != 0) {
439 closedir(dir);
440 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to stat %s", fullPath);
441 return false;
442 }
443 // Only accumulate sizes of regular files. This will exclude '.' and '..'.
444 if (S_ISREG(st.st_mode)) {
445 totalSize += st.st_size;
446 }
447 }
448 closedir(dir);
449 *cacheSizeBytes = totalSize;
450 return true;
451 }
452
benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type,int maxNumIterations,float timeout,std::vector<float> * results)453 bool BenchmarkModel::benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type,
454 int maxNumIterations, float timeout,
455 std::vector<float>* results) {
456 if (results != nullptr) {
457 results->clear();
458 }
459 ScopedTempDirectory tempDir(mCacheDir.value());
460
461 // Initialize cache files to benchmark cache hit.
462 if (type == CompilationBenchmarkType::PREPARE_FROM_CACHE) {
463 tempDir.recreate();
464 const bool success = runCompilation(tempDir.get());
465 if (!success) {
466 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Save to cache failed");
467 return false;
468 }
469 }
470
471 float compilationTotal = 0.0;
472 for (int i = 0; i < maxNumIterations; i++) {
473 const char* cacheDir = nullptr;
474 switch (type) {
475 case CompilationBenchmarkType::WITHOUT_CACHE:
476 cacheDir = nullptr;
477 break;
478 case CompilationBenchmarkType::SAVE_TO_CACHE:
479 // Remove the cache files from the last iteration to benchmark cache miss.
480 tempDir.recreate();
481 [[fallthrough]];
482 case CompilationBenchmarkType::PREPARE_FROM_CACHE:
483 cacheDir = tempDir.get();
484 break;
485 default:
486 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Unknown CompilationBenchmarkType: %d",
487 static_cast<int>(type));
488 return false;
489 }
490
491 kTraceFunc.ATrace_beginSection("[NN_LA_PC]BenchmarkModel::benchmarkCompilation");
492 const long long startTime = currentTimeInUsec();
493 const bool success = runCompilation(cacheDir);
494 const long long endTime = currentTimeInUsec();
495 kTraceFunc.ATrace_endSection();
496 if (!success) {
497 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Compilation %d failed", i);
498 return false;
499 }
500
501 const float compilationTime = static_cast<float>(endTime - startTime) / 1000000.0f;
502 if (results != nullptr) {
503 results->push_back(compilationTime);
504 }
505
506 // Timeout?
507 compilationTotal += compilationTime;
508 if (timeout > 0.001 && compilationTotal > timeout) {
509 return true;
510 }
511 }
512 return true;
513 }
514
benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,int maxNumIterations,float warmupTimeout,float runTimeout,std::vector<float> * results)515 bool BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,
516 int maxNumIterations,
517 float warmupTimeout,
518 float runTimeout,
519 std::vector<float>* results) {
520 kTraceFunc.ATrace_beginSection(
521 "[NN_LA_PWM]BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup");
522 bool success = benchmarkSingleTypeOfCompilation(type, maxNumIterations, warmupTimeout, nullptr);
523 kTraceFunc.ATrace_endSection();
524 if (!success) return false;
525
526 kTraceFunc.ATrace_beginSection(
527 "[NN_LA_PBM]BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup");
528 success = benchmarkSingleTypeOfCompilation(type, maxNumIterations, runTimeout, results);
529 kTraceFunc.ATrace_endSection();
530 return success;
531 }
532
benchmarkCompilation(int maxNumIterations,float warmupTimeout,float runTimeout,CompilationBenchmarkResult * result)533 bool BenchmarkModel::benchmarkCompilation(int maxNumIterations, float warmupTimeout,
534 float runTimeout, CompilationBenchmarkResult* result) {
535 if (result == nullptr) return false;
536
537 // Benchmark compile without cache.
538 bool success = benchmarkSingleTypeOfCompilationWithWarmup(
539 CompilationBenchmarkType::WITHOUT_CACHE, maxNumIterations, warmupTimeout, runTimeout,
540 &result->compileWithoutCacheTimeSec);
541 if (!success) {
542 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
543 "Failed to benchmark compilation without cache");
544 return false;
545 }
546
547 // Get compilation cache size.
548 success = getCompilationCacheSize(&result->cacheSizeBytes);
549 if (!success) {
550 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to retrieve compilation cache size");
551 return false;
552 }
553
554 // Benchmark saving to cache and preparing from cache only if supported.
555 if (result->cacheSizeBytes > 0) {
556 // Benchmark saving to cache.
557 auto& saveToCacheTimeSec = result->saveToCacheTimeSec.emplace();
558 success = benchmarkSingleTypeOfCompilationWithWarmup(
559 CompilationBenchmarkType::SAVE_TO_CACHE, maxNumIterations, warmupTimeout, runTimeout,
560 &saveToCacheTimeSec);
561 if (!success) {
562 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to benchmark saving to cache");
563 return false;
564 }
565
566 // Benchmark preparing from cache.
567 auto& prepareFromCacheTimeSec = result->prepareFromCacheTimeSec.emplace();
568 success = benchmarkSingleTypeOfCompilationWithWarmup(
569 CompilationBenchmarkType::PREPARE_FROM_CACHE, maxNumIterations, warmupTimeout,
570 runTimeout, &prepareFromCacheTimeSec);
571 if (!success) {
572 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to benchmark preparing from cache");
573 return false;
574 }
575 }
576 return result;
577 }
578
dumpAllLayers(const char * path,const std::vector<InferenceInOutSequence> & inOutData)579 bool BenchmarkModel::dumpAllLayers(
580 const char* path, const std::vector<InferenceInOutSequence>& inOutData) {
581 if (inOutData.empty()) {
582 FATAL("Input/output vector is empty");
583 }
584
585 for (int seqInferenceIndex = 0; seqInferenceIndex < inOutData.size();
586 ++seqInferenceIndex) {
587 resetStates();
588
589 const InferenceInOutSequence& seq = inOutData[seqInferenceIndex];
590 for (int i = 0; i < seq.size(); ++i) {
591 const InferenceInOut& data = seq[i];
592 setInput(data.input, data.input_size);
593 const bool success = runInference();
594 if (!success) {
595 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Inference %d failed",
596 i);
597 return false;
598 }
599
600 // The order of the tensor is not sorted by the tensor index
601 for (int tensor_order = 0; tensor_order < outputs.size(); ++tensor_order) {
602 int tensor_index = outputs[tensor_order];
603 auto* output_tensor = mTfliteInterpreter->tensor(tensor_index);
604 if (output_tensor->data.raw == nullptr) {
605 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
606 "output_tensor->data.raw == nullptr at index %d ", tensor_index);
607 continue;
608 }
609 char fullpath[1024];
610 snprintf(fullpath, 1024, "%s/dump_%.3d_seq_%.3d_order_%.3d_tensor_%.3d", path,
611 seqInferenceIndex, i, tensor_order, tensor_index);
612 FILE* f = fopen(fullpath, "wb");
613 fwrite(output_tensor->data.raw, output_tensor->bytes, 1, f);
614 fclose(f);
615 }
616 }
617 }
618 return true;
619 }
620