1 /** 2 * Copyright 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H 18 #define COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H 19 20 #include "tensorflow/lite/interpreter.h" 21 #include "tensorflow/lite/model.h" 22 23 #include <unistd.h> 24 #include <vector> 25 26 struct InferenceOutput { 27 uint8_t* ptr; 28 size_t size; 29 }; 30 31 // Inputs and expected outputs for inference 32 struct InferenceInOut { 33 // Input can either be directly specified as a pointer or indirectly with 34 // the createInput callback. This is needed for large datasets where 35 // allocating memory for all inputs at once is not feasible. 36 uint8_t* input; 37 size_t input_size; 38 39 std::vector<InferenceOutput> outputs; 40 std::function<bool(uint8_t*, size_t)> createInput; 41 }; 42 43 // Inputs and expected outputs for an inference sequence. 44 using InferenceInOutSequence = std::vector<InferenceInOut>; 45 46 // Result of a single inference 47 struct InferenceResult { 48 float computeTimeSec; 49 // MSE for each output 50 std::vector<float> meanSquareErrors; 51 // Max single error for each output 52 std::vector<float> maxSingleErrors; 53 // Outputs 54 std::vector<std::vector<uint8_t>> inferenceOutputs; 55 int inputOutputSequenceIndex; 56 int inputOutputIndex; 57 }; 58 59 struct CompilationBenchmarkResult { 60 std::vector<float> compileWithoutCacheTimeSec; 61 // The following optional fields have no value if compilation caching is not supported. 62 std::optional<std::vector<float>> saveToCacheTimeSec; 63 std::optional<std::vector<float>> prepareFromCacheTimeSec; 64 // The total size of cache files. It is zero if compilation caching is not supported. 65 int cacheSizeBytes = 0; 66 }; 67 68 /** Discard inference output in inference results. */ 69 const int FLAG_DISCARD_INFERENCE_OUTPUT = 1 << 0; 70 /** Do not expect golden output for inference inputs. */ 71 const int FLAG_IGNORE_GOLDEN_OUTPUT = 1 << 1; 72 73 enum class CompilationBenchmarkType { 74 // Benchmark without cache 75 WITHOUT_CACHE, 76 // Benchmark cache miss 77 SAVE_TO_CACHE, 78 // Benchmark cache hit 79 PREPARE_FROM_CACHE, 80 }; 81 82 class BenchmarkModel { 83 public: 84 ~BenchmarkModel(); 85 86 static BenchmarkModel* create(const char* modelfile, bool use_nnapi, 87 bool enable_intermediate_tensors_dump, 88 int* nnapiErrno, const char* nnapi_device_name, 89 bool mmapModel, const char* nnapi_cache_dir); 90 91 bool resizeInputTensors(std::vector<int> shape); 92 bool setInput(const uint8_t* dataPtr, size_t length); 93 bool runInference(); 94 // Resets TFLite states (RNN/LSTM states etc). 95 bool resetStates(); 96 97 bool benchmark(const std::vector<InferenceInOutSequence>& inOutData, 98 int seqInferencesMaxCount, float timeout, int flags, 99 std::vector<InferenceResult>* result); 100 101 bool benchmarkCompilation(int maxNumIterations, float warmupTimeout, float runTimeout, 102 CompilationBenchmarkResult* result); 103 104 bool dumpAllLayers(const char* path, 105 const std::vector<InferenceInOutSequence>& inOutData); 106 107 private: 108 BenchmarkModel(); 109 bool init(const char* modelfile, bool use_nnapi, 110 bool enable_intermediate_tensors_dump, 111 int* nnapiErrno, const char* nnapi_device_name, 112 /* flag to choose between memory mapping the model and initializing 113 the model from programs memory*/ 114 bool mmapModel, 115 const char* nnapi_cache_dir); 116 117 void getOutputError(const uint8_t* dataPtr, size_t length, 118 InferenceResult* result, int output_index); 119 void saveInferenceOutput(InferenceResult* result, int output_index); 120 121 bool runCompilation(const char* cacheDir); 122 bool benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type, int maxNumIterations, 123 float timeout, std::vector<float>* results); 124 bool benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type, 125 int maxNumIterations, float warmupTimeout, 126 float runTimeout, std::vector<float>* results); 127 bool getCompilationCacheSize(int* cacheSizeBytes); 128 129 std::string mModelBuffer; 130 std::unique_ptr<tflite::FlatBufferModel> mTfliteModel; 131 std::unique_ptr<tflite::Interpreter> mTfliteInterpreter; 132 std::unique_ptr<tflite::StatefulNnApiDelegate> mTfliteNnapiDelegate; 133 // Store indices of output tensors, used to dump intermediate tensors 134 std::vector<int> outputs; 135 136 // Parameters for compilation 137 std::string mModelFile; 138 bool mUseNnApi; 139 std::optional<std::string> mCacheDir; 140 std::string mNnApiDeviceName; 141 }; 142 143 #endif // COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H 144