1 /**
2  * Copyright 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "run_tflite.h"
18 
19 #include <android/log.h>
20 #include <dirent.h>
21 #include <dlfcn.h>
22 #include <fcntl.h>
23 #include <ftw.h>
24 #include <sys/time.h>
25 #include <unistd.h>
26 
27 #include <cstdio>
28 #include <fstream>
29 
30 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
31 #include "tensorflow/lite/kernels/register.h"
32 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
33 
34 #define LOG_TAG "NN_BENCHMARK"
35 
36 #define FATAL(fmt, ...)                                                  \
37   do {                                                                   \
38     __android_log_print(ANDROID_LOG_FATAL, LOG_TAG, fmt, ##__VA_ARGS__); \
39     assert(false);                                                       \
40   } while (0)
41 
42 namespace {
43 
currentTimeInUsec()44 long long currentTimeInUsec() {
45   timeval tv;
46   gettimeofday(&tv, NULL);
47   return ((tv.tv_sec * 1000000L) + tv.tv_usec);
48 }
49 
50 // Workaround for build systems that make difficult to pick the correct NDK API
51 // level. NDK tracing methods are dynamically loaded from libandroid.so.
52 typedef void* (*fp_ATrace_beginSection)(const char* sectionName);
53 typedef void* (*fp_ATrace_endSection)();
54 struct TraceFunc {
55   fp_ATrace_beginSection ATrace_beginSection;
56   fp_ATrace_endSection ATrace_endSection;
57 };
setupTraceFunc()58 TraceFunc setupTraceFunc() {
59   void* lib = dlopen("libandroid.so", RTLD_NOW | RTLD_LOCAL);
60   if (lib == nullptr) {
61     FATAL("unable to open libandroid.so");
62   }
63   return {
64       reinterpret_cast<fp_ATrace_beginSection>(
65           dlsym(lib, "ATrace_beginSection")),
66       reinterpret_cast<fp_ATrace_endSection>(dlsym(lib, "ATrace_endSection"))};
67 }
68 static TraceFunc kTraceFunc{setupTraceFunc()};
69 
70 }  // namespace
71 
create(const char * modelfile,bool use_nnapi,bool enable_intermediate_tensors_dump,int * nnapiErrno,const char * nnapi_device_name,bool mmapModel,const char * nnapi_cache_dir)72 BenchmarkModel* BenchmarkModel::create(const char* modelfile, bool use_nnapi,
73                                        bool enable_intermediate_tensors_dump, int* nnapiErrno,
74                                        const char* nnapi_device_name, bool mmapModel,
75                                        const char* nnapi_cache_dir) {
76   BenchmarkModel* model = new BenchmarkModel();
77   if (!model->init(modelfile, use_nnapi, enable_intermediate_tensors_dump, nnapiErrno,
78                    nnapi_device_name, mmapModel, nnapi_cache_dir)) {
79     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to init model %s", modelfile);
80     delete model;
81     return nullptr;
82   }
83   return model;
84 }
85 
init(const char * modelfile,bool use_nnapi,bool enable_intermediate_tensors_dump,int * nnapiErrno,const char * nnapi_device_name,bool mmapModel,const char * nnapi_cache_dir)86 bool BenchmarkModel::init(const char* modelfile, bool use_nnapi,
87                           bool enable_intermediate_tensors_dump, int* nnapiErrno,
88                           const char* nnapi_device_name, bool mmapModel,
89                           const char* nnapi_cache_dir) {
90   mModelFile = modelfile;
91   mUseNnApi = use_nnapi;
92   if (nnapi_cache_dir) {
93     mCacheDir = nnapi_cache_dir;
94   }
95   if (nnapi_device_name) {
96     mNnApiDeviceName = nnapi_device_name;
97   }
98 
99   if (mmapModel) {
100     // Memory map the model. NOTE this needs lifetime greater than or equal
101     // to interpreter context.
102     mTfliteModel = tflite::FlatBufferModel::BuildFromFile(modelfile);
103   } else {
104     std::ifstream t(modelfile);
105     mModelBuffer = std::string((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
106     mTfliteModel = tflite::FlatBufferModel::BuildFromBuffer(mModelBuffer.c_str(), mModelBuffer.size());
107   }
108   if (!mTfliteModel) {
109     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to load model %s",
110                         modelfile);
111     return false;
112   }
113 
114   tflite::ops::builtin::BuiltinOpResolver resolver;
115   tflite::InterpreterBuilder(*mTfliteModel, resolver)(&mTfliteInterpreter);
116   if (!mTfliteInterpreter) {
117     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
118                         "Failed to create TFlite interpreter");
119     return false;
120   }
121 
122   if (enable_intermediate_tensors_dump) {
123     // Make output of every op a model output. This way we will be able to
124     // fetch each intermediate tensor when running with delegates.
125     outputs.clear();
126     for (size_t node = 0; node < mTfliteInterpreter->nodes_size(); ++node) {
127       auto node_outputs =
128           mTfliteInterpreter->node_and_registration(node)->first.outputs;
129       outputs.insert(outputs.end(), node_outputs->data,
130                      node_outputs->data + node_outputs->size);
131     }
132     mTfliteInterpreter->SetOutputs(outputs);
133   }
134 
135   // Allow Fp16 precision for all models
136   mTfliteInterpreter->SetAllowFp16PrecisionForFp32(true);
137 
138   if (use_nnapi) {
139     tflite::StatefulNnApiDelegate::Options nnapi_options;
140     nnapi_options.accelerator_name = nnapi_device_name;
141     mTfliteNnapiDelegate = std::make_unique<tflite::StatefulNnApiDelegate>(nnapi_options);
142     int delegationStatus = mTfliteInterpreter->ModifyGraphWithDelegate(mTfliteNnapiDelegate.get());
143     *nnapiErrno = mTfliteNnapiDelegate->GetNnApiErrno();
144     if (delegationStatus != kTfLiteOk ||
145         *nnapiErrno != ANEURALNETWORKS_NO_ERROR) {
146       __android_log_print(
147           ANDROID_LOG_ERROR, LOG_TAG,
148           "Failed to initialize NNAPI Delegate for model %s, nnapi_errno is %d",
149           modelfile, *nnapiErrno);
150       return false;
151     }
152   }
153   return true;
154 }
155 
BenchmarkModel()156 BenchmarkModel::BenchmarkModel() {}
~BenchmarkModel()157 BenchmarkModel::~BenchmarkModel() {}
158 
setInput(const uint8_t * dataPtr,size_t length)159 bool BenchmarkModel::setInput(const uint8_t* dataPtr, size_t length) {
160   int input = mTfliteInterpreter->inputs()[0];
161   auto* input_tensor = mTfliteInterpreter->tensor(input);
162 
163   switch (input_tensor->type) {
164     case kTfLiteFloat32:
165     case kTfLiteUInt8: {
166       void* raw = input_tensor->data.raw;
167       memcpy(raw, dataPtr, length);
168       break;
169     }
170     default:
171       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
172                           "Input tensor type not supported");
173       return false;
174   }
175   return true;
176 }
saveInferenceOutput(InferenceResult * result,int output_index)177 void BenchmarkModel::saveInferenceOutput(InferenceResult* result,
178                                          int output_index) {
179   int output = mTfliteInterpreter->outputs()[output_index];
180   auto* output_tensor = mTfliteInterpreter->tensor(output);
181   auto& sink = result->inferenceOutputs[output_index];
182   sink.insert(sink.end(), output_tensor->data.uint8,
183               output_tensor->data.uint8 + output_tensor->bytes);
184 }
185 
getOutputError(const uint8_t * expected_data,size_t length,InferenceResult * result,int output_index)186 void BenchmarkModel::getOutputError(const uint8_t* expected_data, size_t length,
187                                     InferenceResult* result, int output_index) {
188   int output = mTfliteInterpreter->outputs()[output_index];
189   auto* output_tensor = mTfliteInterpreter->tensor(output);
190   if (output_tensor->bytes != length) {
191     FATAL("Wrong size of output tensor, expected %zu, is %zu",
192           output_tensor->bytes, length);
193   }
194 
195   size_t elements_count = 0;
196   float err_sum = 0.0;
197   float max_error = 0.0;
198   switch (output_tensor->type) {
199     case kTfLiteUInt8: {
200       uint8_t* output_raw = mTfliteInterpreter->typed_tensor<uint8_t>(output);
201       elements_count = output_tensor->bytes;
202       for (size_t i = 0; i < output_tensor->bytes; ++i) {
203         float err = ((float)output_raw[i]) - ((float)expected_data[i]);
204         if (err > max_error) max_error = err;
205         err_sum += err * err;
206       }
207       break;
208     }
209     case kTfLiteFloat32: {
210       const float* expected = reinterpret_cast<const float*>(expected_data);
211       float* output_raw = mTfliteInterpreter->typed_tensor<float>(output);
212       elements_count = output_tensor->bytes / sizeof(float);
213       for (size_t i = 0; i < output_tensor->bytes / sizeof(float); ++i) {
214         float err = output_raw[i] - expected[i];
215         if (err > max_error) max_error = err;
216         err_sum += err * err;
217       }
218       break;
219     }
220     default:
221       FATAL("Output sensor type %d not supported", output_tensor->type);
222   }
223   result->meanSquareErrors[output_index] = err_sum / elements_count;
224   result->maxSingleErrors[output_index] = max_error;
225 }
226 
resizeInputTensors(std::vector<int> shape)227 bool BenchmarkModel::resizeInputTensors(std::vector<int> shape) {
228   // The benchmark only expects single input tensor, hardcoded as 0.
229   int input = mTfliteInterpreter->inputs()[0];
230   mTfliteInterpreter->ResizeInputTensor(input, shape);
231   if (mTfliteInterpreter->AllocateTensors() != kTfLiteOk) {
232     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
233                         "Failed to allocate tensors!");
234     return false;
235   }
236   return true;
237 }
238 
runInference()239 bool BenchmarkModel::runInference() {
240   auto status = mTfliteInterpreter->Invoke();
241   auto nnapi_errno = mTfliteNnapiDelegate
242                          ? mTfliteNnapiDelegate->GetNnApiErrno()
243                          : ANEURALNETWORKS_NO_ERROR;
244   if (status != kTfLiteOk || nnapi_errno != ANEURALNETWORKS_NO_ERROR) {
245     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
246                         "Failed to invoke, tflite status: %d, nnapi errno: %d!",
247                         (int)status, nnapi_errno);
248     return false;
249   }
250   return true;
251 }
252 
resetStates()253 bool BenchmarkModel::resetStates() {
254   auto status = mTfliteInterpreter->ResetVariableTensors();
255   if (status != kTfLiteOk) {
256     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
257                         "Failed to reset variable tensors: %d!", (int)status);
258     return false;
259   }
260   return true;
261 }
262 
benchmark(const std::vector<InferenceInOutSequence> & inOutData,int seqInferencesMaxCount,float timeout,int flags,std::vector<InferenceResult> * results)263 bool BenchmarkModel::benchmark(
264     const std::vector<InferenceInOutSequence>& inOutData,
265     int seqInferencesMaxCount, float timeout, int flags,
266     std::vector<InferenceResult>* results) {
267   if (inOutData.empty()) {
268     __android_log_print(ANDROID_LOG_WARN, LOG_TAG,
269                         "Input/output vector is empty");
270     return true;
271   }
272 
273   float inferenceTotal = 0.0;
274   for (int seqInferenceIndex = 0; seqInferenceIndex < seqInferencesMaxCount;
275        ++seqInferenceIndex) {
276     resetStates();
277 
278     const int inputOutputSequenceIndex = seqInferenceIndex % inOutData.size();
279     const InferenceInOutSequence& seq = inOutData[inputOutputSequenceIndex];
280     for (int i = 0; i < seq.size(); ++i) {
281       const InferenceInOut& data = seq[i];
282 
283       // For NNAPI systrace usage documentation, see
284       // frameworks/ml/nn/common/include/Tracing.h.
285       kTraceFunc.ATrace_beginSection("[NN_LA_PE]BenchmarkModel::benchmark");
286       kTraceFunc.ATrace_beginSection("[NN_LA_PIO]BenchmarkModel::input");
287       if (data.input) {
288         setInput(data.input, data.input_size);
289       } else {
290         int input = mTfliteInterpreter->inputs()[0];
291         auto* input_tensor = mTfliteInterpreter->tensor(input);
292         if (!data.createInput((uint8_t*)input_tensor->data.raw,
293                               input_tensor->bytes)) {
294           __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
295                               "Input creation %d failed", i);
296           return false;
297         }
298       }
299       kTraceFunc.ATrace_endSection();
300       long long startTime = currentTimeInUsec();
301       const bool success = runInference();
302       kTraceFunc.ATrace_endSection();
303       long long endTime = currentTimeInUsec();
304       if (!success) {
305         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Inference %d failed",
306                             i);
307         return false;
308       }
309 
310       float inferenceTime =
311           static_cast<float>(endTime - startTime) / 1000000.0f;
312       size_t outputsCount = mTfliteInterpreter->outputs().size();
313       InferenceResult result{
314           inferenceTime, {}, {}, {}, inputOutputSequenceIndex, i};
315       result.meanSquareErrors.resize(outputsCount);
316       result.maxSingleErrors.resize(outputsCount);
317       result.inferenceOutputs.resize(outputsCount);
318 
319       if ((flags & FLAG_IGNORE_GOLDEN_OUTPUT) == 0) {
320         if (outputsCount != data.outputs.size()) {
321           __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
322                               "Golden/actual outputs (%zu/%zu) count mismatch",
323                               data.outputs.size(), outputsCount);
324           return false;
325         }
326         for (int j = 0; j < outputsCount; ++j) {
327           getOutputError(data.outputs[j].ptr, data.outputs[j].size, &result, j);
328         }
329       }
330 
331       if ((flags & FLAG_DISCARD_INFERENCE_OUTPUT) == 0) {
332         for (int j = 0; j < outputsCount; ++j) {
333           saveInferenceOutput(&result, j);
334         }
335       }
336       results->push_back(result);
337       inferenceTotal += inferenceTime;
338     }
339 
340     // Timeout?
341     if (timeout > 0.001 && inferenceTotal > timeout) {
342       return true;
343     }
344   }
345   return true;
346 }
347 
348 // If cacheDir is not nullptr, compilation caching will be used with NNAPI.
runCompilation(const char * cacheDir)349 bool BenchmarkModel::runCompilation(const char* cacheDir) {
350   std::unique_ptr<tflite::Interpreter> interpreter;
351   tflite::ops::builtin::BuiltinOpResolver resolver;
352   tflite::InterpreterBuilder(*mTfliteModel, resolver)(&interpreter);
353   if (!interpreter) {
354     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to create TFlite interpreter");
355     return false;
356   }
357 
358   // Allow Fp16 precision for all models
359   interpreter->SetAllowFp16PrecisionForFp32(true);
360 
361   if (mUseNnApi) {
362     tflite::StatefulNnApiDelegate::Options nnapi_options;
363     nnapi_options.accelerator_name = mNnApiDeviceName.empty() ? nullptr : mNnApiDeviceName.c_str();
364     if (cacheDir) {
365       nnapi_options.cache_dir = cacheDir;
366       nnapi_options.model_token = mModelFile.c_str();
367     }
368     tflite::StatefulNnApiDelegate delegate(nnapi_options);
369     int delegationStatus = interpreter->ModifyGraphWithDelegate(&delegate);
370     auto nnapiErrno = delegate.GetNnApiErrno();
371     if (delegationStatus != kTfLiteOk || nnapiErrno != ANEURALNETWORKS_NO_ERROR) {
372       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
373                           "Failed to initialize NNAPI Delegate for model %s, nnapi_errno is %d",
374                           mModelFile.c_str(), nnapiErrno);
375       return false;
376     }
377   }
378   return true;
379 }
380 
381 // A helper class to manage the lifetime of a temporary cache directory.
382 class ScopedTempDirectory {
383  public:
ScopedTempDirectory(std::string base)384   ScopedTempDirectory(std::string base) : mBase(std::move(base)) {}
~ScopedTempDirectory()385   ~ScopedTempDirectory() { cleanup(); }
386 
387   // Create a new temp directory, remove the old one if needed.
recreate()388   void recreate() {
389     cleanup();
390     mTempDir = mBase + "/XXXXXX";
391     mkdtemp(&mTempDir[0]);
392   }
393 
394   // Get the path to the temp directory.
get() const395   const char* get() const { return mTempDir.empty() ? nullptr : mTempDir.c_str(); }
396 
397  private:
cleanup()398   void cleanup() {
399     if (mTempDir.empty()) {
400       return;
401     }
402     auto callback = [](const char* entry, const struct stat*, int, struct FTW*) {
403       return remove(entry);
404     };
405     nftw(mTempDir.c_str(), callback, 128, FTW_DEPTH | FTW_MOUNT | FTW_PHYS);
406     mTempDir.clear();
407   }
408 
409   std::string mBase;
410   std::string mTempDir;
411 };
412 
getCompilationCacheSize(int * cacheSizeBytes)413 bool BenchmarkModel::getCompilationCacheSize(int* cacheSizeBytes) {
414   if (cacheSizeBytes == nullptr) return false;
415 
416   // Create cache files.
417   ScopedTempDirectory tempDir(mCacheDir.value());
418   tempDir.recreate();
419   const bool success = runCompilation(tempDir.get());
420   if (!success) {
421     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Save to cache failed");
422     return false;
423   }
424 
425   // Compute total size of cache files.
426   int totalSize = 0;
427   DIR* dir = opendir(tempDir.get());
428   if (dir == nullptr) {
429     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to open cache directory");
430     return false;
431   }
432   struct dirent* dp = nullptr;
433   while ((dp = readdir(dir)) != nullptr) {
434     char fullPath[1024];
435     snprintf(fullPath, 1024, "%s/%s", tempDir.get(), dp->d_name);
436     struct stat st;
437     int err = stat(fullPath, &st);
438     if (err != 0) {
439       closedir(dir);
440       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to stat %s", fullPath);
441       return false;
442     }
443     // Only accumulate sizes of regular files. This will exclude '.' and '..'.
444     if (S_ISREG(st.st_mode)) {
445       totalSize += st.st_size;
446     }
447   }
448   closedir(dir);
449   *cacheSizeBytes = totalSize;
450   return true;
451 }
452 
benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type,int maxNumIterations,float timeout,std::vector<float> * results)453 bool BenchmarkModel::benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type,
454                                                       int maxNumIterations, float timeout,
455                                                       std::vector<float>* results) {
456   if (results != nullptr) {
457     results->clear();
458   }
459   ScopedTempDirectory tempDir(mCacheDir.value());
460 
461   // Initialize cache files to benchmark cache hit.
462   if (type == CompilationBenchmarkType::PREPARE_FROM_CACHE) {
463     tempDir.recreate();
464     const bool success = runCompilation(tempDir.get());
465     if (!success) {
466       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Save to cache failed");
467       return false;
468     }
469   }
470 
471   float compilationTotal = 0.0;
472   for (int i = 0; i < maxNumIterations; i++) {
473     const char* cacheDir = nullptr;
474     switch (type) {
475       case CompilationBenchmarkType::WITHOUT_CACHE:
476         cacheDir = nullptr;
477         break;
478       case CompilationBenchmarkType::SAVE_TO_CACHE:
479         // Remove the cache files from the last iteration to benchmark cache miss.
480         tempDir.recreate();
481         [[fallthrough]];
482       case CompilationBenchmarkType::PREPARE_FROM_CACHE:
483         cacheDir = tempDir.get();
484         break;
485       default:
486         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Unknown CompilationBenchmarkType: %d",
487                             static_cast<int>(type));
488         return false;
489     }
490 
491     kTraceFunc.ATrace_beginSection("[NN_LA_PC]BenchmarkModel::benchmarkCompilation");
492     const long long startTime = currentTimeInUsec();
493     const bool success = runCompilation(cacheDir);
494     const long long endTime = currentTimeInUsec();
495     kTraceFunc.ATrace_endSection();
496     if (!success) {
497       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Compilation %d failed", i);
498       return false;
499     }
500 
501     const float compilationTime = static_cast<float>(endTime - startTime) / 1000000.0f;
502     if (results != nullptr) {
503       results->push_back(compilationTime);
504     }
505 
506     // Timeout?
507     compilationTotal += compilationTime;
508     if (timeout > 0.001 && compilationTotal > timeout) {
509       return true;
510     }
511   }
512   return true;
513 }
514 
benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,int maxNumIterations,float warmupTimeout,float runTimeout,std::vector<float> * results)515 bool BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,
516                                                                 int maxNumIterations,
517                                                                 float warmupTimeout,
518                                                                 float runTimeout,
519                                                                 std::vector<float>* results) {
520   kTraceFunc.ATrace_beginSection(
521           "[NN_LA_PWM]BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup");
522   bool success = benchmarkSingleTypeOfCompilation(type, maxNumIterations, warmupTimeout, nullptr);
523   kTraceFunc.ATrace_endSection();
524   if (!success) return false;
525 
526   kTraceFunc.ATrace_beginSection(
527           "[NN_LA_PBM]BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup");
528   success = benchmarkSingleTypeOfCompilation(type, maxNumIterations, runTimeout, results);
529   kTraceFunc.ATrace_endSection();
530   return success;
531 }
532 
benchmarkCompilation(int maxNumIterations,float warmupTimeout,float runTimeout,CompilationBenchmarkResult * result)533 bool BenchmarkModel::benchmarkCompilation(int maxNumIterations, float warmupTimeout,
534                                           float runTimeout, CompilationBenchmarkResult* result) {
535   if (result == nullptr) return false;
536 
537   // Benchmark compile without cache.
538   bool success = benchmarkSingleTypeOfCompilationWithWarmup(
539           CompilationBenchmarkType::WITHOUT_CACHE, maxNumIterations, warmupTimeout, runTimeout,
540           &result->compileWithoutCacheTimeSec);
541   if (!success) {
542     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
543                         "Failed to benchmark compilation without cache");
544     return false;
545   }
546 
547   // Get compilation cache size.
548   success = getCompilationCacheSize(&result->cacheSizeBytes);
549   if (!success) {
550     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to retrieve compilation cache size");
551     return false;
552   }
553 
554   // Benchmark saving to cache and preparing from cache only if supported.
555   if (result->cacheSizeBytes > 0) {
556     // Benchmark saving to cache.
557     auto& saveToCacheTimeSec = result->saveToCacheTimeSec.emplace();
558     success = benchmarkSingleTypeOfCompilationWithWarmup(
559             CompilationBenchmarkType::SAVE_TO_CACHE, maxNumIterations, warmupTimeout, runTimeout,
560             &saveToCacheTimeSec);
561     if (!success) {
562       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to benchmark saving to cache");
563       return false;
564     }
565 
566     // Benchmark preparing from cache.
567     auto& prepareFromCacheTimeSec = result->prepareFromCacheTimeSec.emplace();
568     success = benchmarkSingleTypeOfCompilationWithWarmup(
569             CompilationBenchmarkType::PREPARE_FROM_CACHE, maxNumIterations, warmupTimeout,
570             runTimeout, &prepareFromCacheTimeSec);
571     if (!success) {
572       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to benchmark preparing from cache");
573       return false;
574     }
575   }
576   return result;
577 }
578 
dumpAllLayers(const char * path,const std::vector<InferenceInOutSequence> & inOutData)579 bool BenchmarkModel::dumpAllLayers(
580     const char* path, const std::vector<InferenceInOutSequence>& inOutData) {
581   if (inOutData.empty()) {
582     FATAL("Input/output vector is empty");
583   }
584 
585   for (int seqInferenceIndex = 0; seqInferenceIndex < inOutData.size();
586        ++seqInferenceIndex) {
587     resetStates();
588 
589     const InferenceInOutSequence& seq = inOutData[seqInferenceIndex];
590     for (int i = 0; i < seq.size(); ++i) {
591       const InferenceInOut& data = seq[i];
592       setInput(data.input, data.input_size);
593       const bool success = runInference();
594       if (!success) {
595         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Inference %d failed",
596                             i);
597         return false;
598       }
599 
600       // The order of the tensor is not sorted by the tensor index
601       for (int tensor_order = 0; tensor_order < outputs.size(); ++tensor_order) {
602         int tensor_index = outputs[tensor_order];
603         auto* output_tensor = mTfliteInterpreter->tensor(tensor_index);
604         if (output_tensor->data.raw == nullptr) {
605           __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
606                       "output_tensor->data.raw == nullptr at index %d ", tensor_index);
607           continue;
608         }
609         char fullpath[1024];
610         snprintf(fullpath, 1024, "%s/dump_%.3d_seq_%.3d_order_%.3d_tensor_%.3d", path,
611                  seqInferenceIndex, i, tensor_order, tensor_index);
612         FILE* f = fopen(fullpath, "wb");
613         fwrite(output_tensor->data.raw, output_tensor->bytes, 1, f);
614         fclose(f);
615       }
616     }
617   }
618   return true;
619 }
620