1 /**
2  * Copyright 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
18 #define COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
19 
20 #include "tensorflow/lite/interpreter.h"
21 #include "tensorflow/lite/model.h"
22 
23 #include <unistd.h>
24 #include <vector>
25 
26 struct InferenceOutput {
27   uint8_t* ptr;
28   size_t size;
29 };
30 
31 // Inputs and expected outputs for inference
32 struct InferenceInOut {
33   // Input can either be directly specified as a pointer or indirectly with
34   // the createInput callback. This is needed for large datasets where
35   // allocating memory for all inputs at once is not feasible.
36   uint8_t* input;
37   size_t input_size;
38 
39   std::vector<InferenceOutput> outputs;
40   std::function<bool(uint8_t*, size_t)> createInput;
41 };
42 
43 // Inputs and expected outputs for an inference sequence.
44 using InferenceInOutSequence = std::vector<InferenceInOut>;
45 
46 // Result of a single inference
47 struct InferenceResult {
48   float computeTimeSec;
49   // MSE for each output
50   std::vector<float> meanSquareErrors;
51   // Max single error for each output
52   std::vector<float> maxSingleErrors;
53   // Outputs
54   std::vector<std::vector<uint8_t>> inferenceOutputs;
55   int inputOutputSequenceIndex;
56   int inputOutputIndex;
57 };
58 
59 struct CompilationBenchmarkResult {
60   std::vector<float> compileWithoutCacheTimeSec;
61   // The following optional fields have no value if compilation caching is not supported.
62   std::optional<std::vector<float>> saveToCacheTimeSec;
63   std::optional<std::vector<float>> prepareFromCacheTimeSec;
64   // The total size of cache files. It is zero if compilation caching is not supported.
65   int cacheSizeBytes = 0;
66 };
67 
68 /** Discard inference output in inference results. */
69 const int FLAG_DISCARD_INFERENCE_OUTPUT = 1 << 0;
70 /** Do not expect golden output for inference inputs. */
71 const int FLAG_IGNORE_GOLDEN_OUTPUT = 1 << 1;
72 
73 enum class CompilationBenchmarkType {
74   // Benchmark without cache
75   WITHOUT_CACHE,
76   // Benchmark cache miss
77   SAVE_TO_CACHE,
78   // Benchmark cache hit
79   PREPARE_FROM_CACHE,
80 };
81 
82 class BenchmarkModel {
83  public:
84   ~BenchmarkModel();
85 
86   static BenchmarkModel* create(const char* modelfile, bool use_nnapi,
87                                 bool enable_intermediate_tensors_dump,
88                                 int* nnapiErrno, const char* nnapi_device_name,
89                                 bool mmapModel, const char* nnapi_cache_dir);
90 
91   bool resizeInputTensors(std::vector<int> shape);
92   bool setInput(const uint8_t* dataPtr, size_t length);
93   bool runInference();
94   // Resets TFLite states (RNN/LSTM states etc).
95   bool resetStates();
96 
97   bool benchmark(const std::vector<InferenceInOutSequence>& inOutData,
98                  int seqInferencesMaxCount, float timeout, int flags,
99                  std::vector<InferenceResult>* result);
100 
101   bool benchmarkCompilation(int maxNumIterations, float warmupTimeout, float runTimeout,
102                             CompilationBenchmarkResult* result);
103 
104   bool dumpAllLayers(const char* path,
105                      const std::vector<InferenceInOutSequence>& inOutData);
106 
107  private:
108   BenchmarkModel();
109   bool init(const char* modelfile, bool use_nnapi,
110             bool enable_intermediate_tensors_dump,
111             int* nnapiErrno, const char* nnapi_device_name,
112             /* flag to choose between memory mapping the model and initializing
113                 the model from programs memory*/
114             bool mmapModel,
115             const char* nnapi_cache_dir);
116 
117   void getOutputError(const uint8_t* dataPtr, size_t length,
118                       InferenceResult* result, int output_index);
119   void saveInferenceOutput(InferenceResult* result, int output_index);
120 
121   bool runCompilation(const char* cacheDir);
122   bool benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type, int maxNumIterations,
123                                         float timeout, std::vector<float>* results);
124   bool benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,
125                                                   int maxNumIterations, float warmupTimeout,
126                                                   float runTimeout, std::vector<float>* results);
127   bool getCompilationCacheSize(int* cacheSizeBytes);
128 
129   std::string mModelBuffer;
130   std::unique_ptr<tflite::FlatBufferModel> mTfliteModel;
131   std::unique_ptr<tflite::Interpreter> mTfliteInterpreter;
132   std::unique_ptr<tflite::StatefulNnApiDelegate> mTfliteNnapiDelegate;
133   // Store indices of output tensors, used to dump intermediate tensors
134   std::vector<int> outputs;
135 
136   // Parameters for compilation
137   std::string mModelFile;
138   bool mUseNnApi;
139   std::optional<std::string> mCacheDir;
140   std::string mNnApiDeviceName;
141 };
142 
143 #endif  // COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
144