1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_EXECUTION_BUILDER_H 18 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_EXECUTION_BUILDER_H 19 20 #include <atomic> 21 #include <memory> 22 #include <tuple> 23 #include <utility> 24 #include <vector> 25 26 #include "Callbacks.h" 27 #include "ControlFlow.h" 28 #include "CpuExecutor.h" 29 #include "HalInterfaces.h" 30 #include "Memory.h" 31 #include "ModelArgumentInfo.h" 32 #include "ModelBuilder.h" 33 #include "NeuralNetworks.h" 34 35 namespace android { 36 namespace nn { 37 38 class BurstBuilder; 39 class CompilationBuilder; 40 class Device; 41 class ExecutionBurstController; 42 class ExecutionPlan; 43 class ExecutionStep; 44 class Memory; 45 class ModelBuilder; 46 class PreparedModel; 47 class StepExecutor; 48 49 class ExecutionBuilder { 50 friend class StepExecutor; 51 52 public: 53 ExecutionBuilder(const CompilationBuilder* compilation); 54 55 int setInput(uint32_t index, const ANeuralNetworksOperandType* type, const void* buffer, 56 size_t length); 57 int setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 58 const Memory* memory, size_t offset, size_t length); 59 int setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer, 60 size_t length); 61 int setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 62 const Memory* memory, size_t offset, size_t length); 63 64 int setMeasureTiming(bool measure); 65 66 int getDuration(int32_t durationCode, uint64_t* duration) const; 67 68 int setTimeoutDuration(uint64_t duration); 69 70 std::optional<uint64_t> getTimeoutDuration() const; 71 72 int setLoopTimeout(uint64_t duration); 73 getLoopTimeoutDuration()74 uint64_t getLoopTimeoutDuration() const { return mLoopTimeoutDuration; } 75 76 int computeFenced(const std::vector<int>& wait_for, uint64_t timeoutDurationAfterFence, 77 int* sync_fence); 78 computeAsynchronously(sp<ExecutionCallback> * synchronizationCallback)79 int computeAsynchronously(sp<ExecutionCallback>* synchronizationCallback) { 80 CHECK(synchronizationCallback != nullptr); 81 return compute(synchronizationCallback); 82 } computeSynchronously()83 int computeSynchronously() { return compute(nullptr); } burstCompute(BurstBuilder * burst)84 int burstCompute(BurstBuilder* burst) { return compute(nullptr, burst); } 85 86 // Initialize output dimensional information from ModelArgumentInfo. 87 std::vector<hal::OutputShape> getInitialOutputShapes() const; 88 89 int getOutputOperandDimensions(uint32_t index, uint32_t* dimensions); 90 int getOutputOperandRank(uint32_t index, uint32_t* rank); 91 92 // Handshake with lower-level execution support measureTiming()93 bool measureTiming() const { return mMeasureTiming; } reportTimingWithoutFencedExecutionCallback(hal::Timing timing)94 void reportTimingWithoutFencedExecutionCallback(hal::Timing timing) { 95 mTimingWithoutFencedExecutionCallback = timing; 96 } 97 getCompilation()98 const CompilationBuilder* getCompilation() const { return mCompilation; } getModel()99 const ModelBuilder* getModel() const { return mModel; } 100 const ModelBuilder* getSourceModel(uint32_t index) const; getSourceOperand(const std::pair<uint32_t,uint32_t> & sourceOperandIndex)101 const hal::Operand& getSourceOperand( 102 const std::pair<uint32_t, uint32_t>& sourceOperandIndex) const { 103 return getSourceModel(sourceOperandIndex.first)->getOperand(sourceOperandIndex.second); 104 } 105 106 hal::ErrorStatus finishWithoutSyncFence(hal::ErrorStatus error, 107 const std::vector<hal::OutputShape>& outputShapes); 108 109 // Retrieve a reference to the IFencedExecutionCallback callback. getFencedExecutionCallback()110 const sp<hal::IFencedExecutionCallback>& getFencedExecutionCallback() { 111 return mFencedExecutionCallback; 112 } 113 inFlight()114 bool inFlight() const { return mStarted && !isFinished(); } 115 getInputInfo(uint32_t index)116 const ModelArgumentInfo& getInputInfo(uint32_t index) const { return mInputs[index]; } getOutputInfo(uint32_t index)117 const ModelArgumentInfo& getOutputInfo(uint32_t index) const { return mOutputs[index]; } 118 getRunTimePoolInfo(uint32_t poolIndex)119 std::optional<RunTimePoolInfo> getRunTimePoolInfo(uint32_t poolIndex) const { 120 return mMemories[poolIndex]->getRunTimePoolInfo(); 121 } 122 123 private: 124 // If a callback is provided, then this is asynchronous. If a callback is 125 // not provided (i.e., is nullptr), then this is synchronous. 126 // 127 // If burst is provided, then the burst path will be used. If a burst is not 128 // provided (i.e., is nullptr), then a synchronous execution will occur. 129 // 130 // Providing both synchronizationCallback and burstBuilder is an error. 131 int compute(sp<ExecutionCallback>* synchronizationCallback, 132 BurstBuilder* burstBuilder = nullptr); 133 134 const CompilationBuilder* mCompilation; 135 136 // Update output dimensional information from OutputShape to ModelArgumentInfo. 137 bool updateOutputShapes(const std::vector<hal::OutputShape>& outputShapes); 138 139 bool updateMemories(); 140 hasSyncFence()141 bool hasSyncFence() const { return mSyncFenceFd > 0; } 142 143 const ModelBuilder* mModel; 144 const ExecutionPlan* mPlan; 145 146 // This is a DeviceManager::kPartitioning* value captured from 147 // CompilationBuilder when the ExecutionBuilder is constructed. 148 uint32_t mPartitioning; 149 150 // The information we'll send to the driver about the inputs and outputs. 151 // Note that we build this in two steps: 152 // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element. 153 // If set from a pointer, don't set the location in the RequestArgument but store it 154 // instead in mInputBuffers or mOutputBuffers. 155 // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for 156 // the m*Buffers entries. Copy the input values into the shared memory. 157 // We do this to avoid creating a lot of shared memory objects if we have a lot of 158 // parameters specified via pointers. We also avoid copying in the case where 159 // some of the nodes will interpreted on the CPU anyway. 160 std::vector<ModelArgumentInfo> mInputs; 161 std::vector<ModelArgumentInfo> mOutputs; 162 MemoryTracker mMemories; 163 164 // Do we ask the driver to measure timing? 165 bool mMeasureTiming = false; 166 167 // Timing reported from the driver. This field is only used if 168 // mFencedExecutionCallback is nullptr. 169 hal::Timing mTimingWithoutFencedExecutionCallback = {}; 170 171 // Amount of time to complete or abort the execution. 172 std::optional<uint64_t> mTimeoutDuration; 173 174 // Amount of time to complete or abort a loop. 175 uint64_t mLoopTimeoutDuration = operation_while::kTimeoutNsDefault; 176 177 // Properties cannot be set once the execution has started. 178 std::atomic_bool mStarted = false; 179 180 // Timing and output shapes can only be queried after the execution is 181 // finished. This field only becomes true if !hasSyncFence(). 182 // See isFinished(). 183 std::atomic_bool mFinishedWithoutSyncFence = false; 184 185 bool isFinished() const; 186 187 // With what error status has execution completed? This field only takes on 188 // a meaningful value if !hasSyncFence(). 189 // See completedWith(). 190 enum class Completion { NO_ERROR, OUTPUT_INSUFFICIENT_SIZE, OTHER_ERROR }; 191 Completion mCompletionWithoutSyncFence = Completion::OTHER_ERROR; 192 193 // With what error status has execution completed? Must only be called if 194 // isFinished(). 195 Completion completedWith() const; 196 197 // The sync fence fd that is created in the computeFenced call, if any. 198 // (Sometimes no sync fence fd will be created.) 199 int mSyncFenceFd = -1; 200 201 // The callback used to query execution related info in the case of fenced 202 // execution; otherwise, nullptr. If the execution plan has multiple steps, 203 // this is the callback associated with the last step. If the last step 204 // doesn't support fenced execution (e.g., the driver is too old), or if the 205 // launch of execution on the driver fails, then this callback will be 206 // nullptr. 207 sp<hal::IFencedExecutionCallback> mFencedExecutionCallback; 208 }; 209 210 // class StepExecutor is used to execute a single "step" in a 211 // potentially multiple step execution process. The graph associated 212 // with that step is executed in its entirety on a single device (or 213 // on the CPU). 214 class StepExecutor { 215 public: 216 // executionBuilder 217 // Describes the full (possibly multiple-"step") execution. 218 // model 219 // The model to be executed by the executor. Possibly a single 220 // "step" model of a multiple-"step" executionBuilder. 221 // driver, preparedModel 222 // The device on which to execute the "step", and the prepared 223 // model to execute on that device. (Both are nullptr in the 224 // case of CPU.) 225 // step 226 // Contains the output index mapping from the excerpted "step" model to 227 // main model if the execution has multiple "steps". Must be nullptr 228 // otherwise. 229 StepExecutor(ExecutionBuilder* executionBuilder, const ModelBuilder* model, 230 std::shared_ptr<Device> device, std::shared_ptr<PreparedModel> preparedModel, 231 const ExecutionStep* step = nullptr); 232 233 // Map inputs and outputs from ExecutionBuilder to StepExecutor, 234 // in the case where we have a single-"step" execution (i.e., the executor 235 // is executing the entire model from the ExecutionBuilder). 236 void mapInputsAndOutputsTrivially(); 237 238 // Update output shapes with shapes returned from execution. 239 bool updateOutputShapes(const std::vector<hal::OutputShape>& from, 240 std::vector<hal::OutputShape>* to); 241 242 // Map inputs and outputs from ExecutionBuilder to StepExecutor, 243 // one at a time. Note that these are input/output indexes, not 244 // operand indexes. mapInput(uint32_t builderIndex,uint32_t executorIndex)245 void mapInput(uint32_t builderIndex, uint32_t executorIndex) { 246 mapInputOrOutput(mExecutionBuilder->mInputs[builderIndex], &mInputs[executorIndex]); 247 } mapOutput(uint32_t builderIndex,uint32_t executorIndex)248 void mapOutput(uint32_t builderIndex, uint32_t executorIndex) { 249 mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex], &mOutputs[executorIndex]); 250 } mapOutputToInput(uint32_t builderIndex,uint32_t executorIndex)251 void mapOutputToInput(uint32_t builderIndex, uint32_t executorIndex) { 252 mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex], &mInputs[executorIndex]); 253 } 254 255 // The input or output is assumed to have the size of the 256 // corresponding operand. setInputFromMemory(uint32_t inputIndex,const Memory * memory,uint32_t offset)257 int setInputFromMemory(uint32_t inputIndex, const Memory* memory, uint32_t offset) { 258 return setInputOrOutputFromMemory(mModel->getInputOperand(inputIndex), memory, offset, 259 &mInputs.at(inputIndex)); 260 } setOutputFromMemory(uint32_t outputIndex,const Memory * memory,uint32_t offset)261 int setOutputFromMemory(uint32_t outputIndex, const Memory* memory, uint32_t offset) { 262 return setInputOrOutputFromMemory(mModel->getOutputOperand(outputIndex), memory, offset, 263 &mOutputs.at(outputIndex)); 264 } 265 266 // Executes using the (driver, preparedModel) specified at construction time. 267 std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> compute( 268 const std::optional<Deadline>& deadline, 269 const std::shared_ptr<ExecutionBurstController>& burstController = nullptr); 270 271 // Re-compiles and executes using the CPU, regardless of the (driver, 272 // preparedModel) specified at construction time. 273 std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> computeOnCpuFallback(); 274 275 bool isCpu() const; 276 277 // Perform fenced execution and return error_code, sync_fence_fd and a 278 // callback. 279 std::tuple<int, int, sp<hal::IFencedExecutionCallback>> computeFenced( 280 const std::vector<int>& wait_for, uint64_t timeoutDurationAfterFence, 281 const std::optional<Deadline>& deadline); 282 283 private: 284 void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput, 285 ModelArgumentInfo* executorInputOrOutput); 286 287 int setInputOrOutputFromMemory(const hal::Operand& inputOrOutputOperand, const Memory* memory, 288 uint32_t offset, ModelArgumentInfo* inputOrOutputInfo); 289 290 std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> computeWithMemories( 291 const std::optional<Deadline>& deadline, const std::vector<const Memory*>& memories, 292 const std::shared_ptr<ExecutionBurstController>& burstController = nullptr); 293 294 // describes the full (possibly multiple-"step") execution 295 ExecutionBuilder* mExecutionBuilder; 296 297 // describes the single execution step 298 const ExecutionStep* mExecutionStep = nullptr; 299 300 // model to be executed on the executor, in both original and 301 // compiled forms; and device on which to execute it 302 const ModelBuilder* mModel; 303 std::shared_ptr<Device> mDevice; 304 std::shared_ptr<PreparedModel> mPreparedModel; 305 306 // The information we'll send to the driver about the inputs and outputs. 307 // Note that we build this in two steps: 308 // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element. 309 // If set from a pointer, don't set the location in the RequestArgument but store it 310 // instead in mInputBuffers or mOutputBuffers. 311 // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for 312 // the m*Buffers entries. Copy the input values into the shared memory. 313 // We do this to avoid creating a lot of shared memory objects if we have a lot of 314 // parameters specified via pointers. We also avoid copying in the case where 315 // some of the nodes will interpreted on the CPU anyway. 316 std::vector<ModelArgumentInfo> mInputs; 317 std::vector<ModelArgumentInfo> mOutputs; 318 MemoryTracker mMemories; 319 }; 320 321 } // namespace nn 322 } // namespace android 323 324 #endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_EXECUTION_BUILDER_H 325