1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_EXECUTION_BUILDER_H
18 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_EXECUTION_BUILDER_H
19 
20 #include <atomic>
21 #include <memory>
22 #include <tuple>
23 #include <utility>
24 #include <vector>
25 
26 #include "Callbacks.h"
27 #include "ControlFlow.h"
28 #include "CpuExecutor.h"
29 #include "HalInterfaces.h"
30 #include "Memory.h"
31 #include "ModelArgumentInfo.h"
32 #include "ModelBuilder.h"
33 #include "NeuralNetworks.h"
34 
35 namespace android {
36 namespace nn {
37 
38 class BurstBuilder;
39 class CompilationBuilder;
40 class Device;
41 class ExecutionBurstController;
42 class ExecutionPlan;
43 class ExecutionStep;
44 class Memory;
45 class ModelBuilder;
46 class PreparedModel;
47 class StepExecutor;
48 
49 class ExecutionBuilder {
50     friend class StepExecutor;
51 
52    public:
53     ExecutionBuilder(const CompilationBuilder* compilation);
54 
55     int setInput(uint32_t index, const ANeuralNetworksOperandType* type, const void* buffer,
56                  size_t length);
57     int setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
58                            const Memory* memory, size_t offset, size_t length);
59     int setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
60                   size_t length);
61     int setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
62                             const Memory* memory, size_t offset, size_t length);
63 
64     int setMeasureTiming(bool measure);
65 
66     int getDuration(int32_t durationCode, uint64_t* duration) const;
67 
68     int setTimeoutDuration(uint64_t duration);
69 
70     std::optional<uint64_t> getTimeoutDuration() const;
71 
72     int setLoopTimeout(uint64_t duration);
73 
getLoopTimeoutDuration()74     uint64_t getLoopTimeoutDuration() const { return mLoopTimeoutDuration; }
75 
76     int computeFenced(const std::vector<int>& wait_for, uint64_t timeoutDurationAfterFence,
77                       int* sync_fence);
78 
computeAsynchronously(sp<ExecutionCallback> * synchronizationCallback)79     int computeAsynchronously(sp<ExecutionCallback>* synchronizationCallback) {
80         CHECK(synchronizationCallback != nullptr);
81         return compute(synchronizationCallback);
82     }
computeSynchronously()83     int computeSynchronously() { return compute(nullptr); }
burstCompute(BurstBuilder * burst)84     int burstCompute(BurstBuilder* burst) { return compute(nullptr, burst); }
85 
86     // Initialize output dimensional information from ModelArgumentInfo.
87     std::vector<hal::OutputShape> getInitialOutputShapes() const;
88 
89     int getOutputOperandDimensions(uint32_t index, uint32_t* dimensions);
90     int getOutputOperandRank(uint32_t index, uint32_t* rank);
91 
92     // Handshake with lower-level execution support
measureTiming()93     bool measureTiming() const { return mMeasureTiming; }
reportTimingWithoutFencedExecutionCallback(hal::Timing timing)94     void reportTimingWithoutFencedExecutionCallback(hal::Timing timing) {
95         mTimingWithoutFencedExecutionCallback = timing;
96     }
97 
getCompilation()98     const CompilationBuilder* getCompilation() const { return mCompilation; }
getModel()99     const ModelBuilder* getModel() const { return mModel; }
100     const ModelBuilder* getSourceModel(uint32_t index) const;
getSourceOperand(const std::pair<uint32_t,uint32_t> & sourceOperandIndex)101     const hal::Operand& getSourceOperand(
102             const std::pair<uint32_t, uint32_t>& sourceOperandIndex) const {
103         return getSourceModel(sourceOperandIndex.first)->getOperand(sourceOperandIndex.second);
104     }
105 
106     hal::ErrorStatus finishWithoutSyncFence(hal::ErrorStatus error,
107                                             const std::vector<hal::OutputShape>& outputShapes);
108 
109     // Retrieve a reference to the IFencedExecutionCallback callback.
getFencedExecutionCallback()110     const sp<hal::IFencedExecutionCallback>& getFencedExecutionCallback() {
111         return mFencedExecutionCallback;
112     }
113 
inFlight()114     bool inFlight() const { return mStarted && !isFinished(); }
115 
getInputInfo(uint32_t index)116     const ModelArgumentInfo& getInputInfo(uint32_t index) const { return mInputs[index]; }
getOutputInfo(uint32_t index)117     const ModelArgumentInfo& getOutputInfo(uint32_t index) const { return mOutputs[index]; }
118 
getRunTimePoolInfo(uint32_t poolIndex)119     std::optional<RunTimePoolInfo> getRunTimePoolInfo(uint32_t poolIndex) const {
120         return mMemories[poolIndex]->getRunTimePoolInfo();
121     }
122 
123    private:
124     // If a callback is provided, then this is asynchronous. If a callback is
125     // not provided (i.e., is nullptr), then this is synchronous.
126     //
127     // If burst is provided, then the burst path will be used. If a burst is not
128     // provided (i.e., is nullptr), then a synchronous execution will occur.
129     //
130     // Providing both synchronizationCallback and burstBuilder is an error.
131     int compute(sp<ExecutionCallback>* synchronizationCallback,
132                 BurstBuilder* burstBuilder = nullptr);
133 
134     const CompilationBuilder* mCompilation;
135 
136     // Update output dimensional information from OutputShape to ModelArgumentInfo.
137     bool updateOutputShapes(const std::vector<hal::OutputShape>& outputShapes);
138 
139     bool updateMemories();
140 
hasSyncFence()141     bool hasSyncFence() const { return mSyncFenceFd > 0; }
142 
143     const ModelBuilder* mModel;
144     const ExecutionPlan* mPlan;
145 
146     // This is a DeviceManager::kPartitioning* value captured from
147     // CompilationBuilder when the ExecutionBuilder is constructed.
148     uint32_t mPartitioning;
149 
150     // The information we'll send to the driver about the inputs and outputs.
151     // Note that we build this in two steps:
152     // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
153     //    If set from a pointer, don't set the location in the RequestArgument but store it
154     //    instead in mInputBuffers or mOutputBuffers.
155     // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
156     //    the m*Buffers entries.  Copy the input values into the shared memory.
157     // We do this to avoid creating a lot of shared memory objects if we have a lot of
158     // parameters specified via pointers.  We also avoid copying in the case where
159     // some of the nodes will interpreted on the CPU anyway.
160     std::vector<ModelArgumentInfo> mInputs;
161     std::vector<ModelArgumentInfo> mOutputs;
162     MemoryTracker mMemories;
163 
164     // Do we ask the driver to measure timing?
165     bool mMeasureTiming = false;
166 
167     // Timing reported from the driver.  This field is only used if
168     // mFencedExecutionCallback is nullptr.
169     hal::Timing mTimingWithoutFencedExecutionCallback = {};
170 
171     // Amount of time to complete or abort the execution.
172     std::optional<uint64_t> mTimeoutDuration;
173 
174     // Amount of time to complete or abort a loop.
175     uint64_t mLoopTimeoutDuration = operation_while::kTimeoutNsDefault;
176 
177     // Properties cannot be set once the execution has started.
178     std::atomic_bool mStarted = false;
179 
180     // Timing and output shapes can only be queried after the execution is
181     // finished.  This field only becomes true if !hasSyncFence().
182     // See isFinished().
183     std::atomic_bool mFinishedWithoutSyncFence = false;
184 
185     bool isFinished() const;
186 
187     // With what error status has execution completed?  This field only takes on
188     // a meaningful value if !hasSyncFence().
189     // See completedWith().
190     enum class Completion { NO_ERROR, OUTPUT_INSUFFICIENT_SIZE, OTHER_ERROR };
191     Completion mCompletionWithoutSyncFence = Completion::OTHER_ERROR;
192 
193     // With what error status has execution completed?  Must only be called if
194     // isFinished().
195     Completion completedWith() const;
196 
197     // The sync fence fd that is created in the computeFenced call, if any.
198     // (Sometimes no sync fence fd will be created.)
199     int mSyncFenceFd = -1;
200 
201     // The callback used to query execution related info in the case of fenced
202     // execution; otherwise, nullptr.  If the execution plan has multiple steps,
203     // this is the callback associated with the last step.  If the last step
204     // doesn't support fenced execution (e.g., the driver is too old), or if the
205     // launch of execution on the driver fails, then this callback will be
206     // nullptr.
207     sp<hal::IFencedExecutionCallback> mFencedExecutionCallback;
208 };
209 
210 // class StepExecutor is used to execute a single "step" in a
211 // potentially multiple step execution process.  The graph associated
212 // with that step is executed in its entirety on a single device (or
213 // on the CPU).
214 class StepExecutor {
215    public:
216     // executionBuilder
217     //     Describes the full (possibly multiple-"step") execution.
218     // model
219     //     The model to be executed by the executor.  Possibly a single
220     //     "step" model of a multiple-"step" executionBuilder.
221     // driver, preparedModel
222     //     The device on which to execute the "step", and the prepared
223     //     model to execute on that device.  (Both are nullptr in the
224     //     case of CPU.)
225     // step
226     //     Contains the output index mapping from the excerpted "step" model to
227     //     main model if the execution has multiple "steps". Must be nullptr
228     //     otherwise.
229     StepExecutor(ExecutionBuilder* executionBuilder, const ModelBuilder* model,
230                  std::shared_ptr<Device> device, std::shared_ptr<PreparedModel> preparedModel,
231                  const ExecutionStep* step = nullptr);
232 
233     // Map inputs and outputs from ExecutionBuilder to StepExecutor,
234     // in the case where we have a single-"step" execution (i.e., the executor
235     // is executing the entire model from the ExecutionBuilder).
236     void mapInputsAndOutputsTrivially();
237 
238     // Update output shapes with shapes returned from execution.
239     bool updateOutputShapes(const std::vector<hal::OutputShape>& from,
240                             std::vector<hal::OutputShape>* to);
241 
242     // Map inputs and outputs from ExecutionBuilder to StepExecutor,
243     // one at a time.  Note that these are input/output indexes, not
244     // operand indexes.
mapInput(uint32_t builderIndex,uint32_t executorIndex)245     void mapInput(uint32_t builderIndex, uint32_t executorIndex) {
246         mapInputOrOutput(mExecutionBuilder->mInputs[builderIndex], &mInputs[executorIndex]);
247     }
mapOutput(uint32_t builderIndex,uint32_t executorIndex)248     void mapOutput(uint32_t builderIndex, uint32_t executorIndex) {
249         mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex], &mOutputs[executorIndex]);
250     }
mapOutputToInput(uint32_t builderIndex,uint32_t executorIndex)251     void mapOutputToInput(uint32_t builderIndex, uint32_t executorIndex) {
252         mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex], &mInputs[executorIndex]);
253     }
254 
255     // The input or output is assumed to have the size of the
256     // corresponding operand.
setInputFromMemory(uint32_t inputIndex,const Memory * memory,uint32_t offset)257     int setInputFromMemory(uint32_t inputIndex, const Memory* memory, uint32_t offset) {
258         return setInputOrOutputFromMemory(mModel->getInputOperand(inputIndex), memory, offset,
259                                           &mInputs.at(inputIndex));
260     }
setOutputFromMemory(uint32_t outputIndex,const Memory * memory,uint32_t offset)261     int setOutputFromMemory(uint32_t outputIndex, const Memory* memory, uint32_t offset) {
262         return setInputOrOutputFromMemory(mModel->getOutputOperand(outputIndex), memory, offset,
263                                           &mOutputs.at(outputIndex));
264     }
265 
266     // Executes using the (driver, preparedModel) specified at construction time.
267     std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> compute(
268             const std::optional<Deadline>& deadline,
269             const std::shared_ptr<ExecutionBurstController>& burstController = nullptr);
270 
271     // Re-compiles and executes using the CPU, regardless of the (driver,
272     // preparedModel) specified at construction time.
273     std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> computeOnCpuFallback();
274 
275     bool isCpu() const;
276 
277     // Perform fenced execution and return error_code, sync_fence_fd and a
278     // callback.
279     std::tuple<int, int, sp<hal::IFencedExecutionCallback>> computeFenced(
280             const std::vector<int>& wait_for, uint64_t timeoutDurationAfterFence,
281             const std::optional<Deadline>& deadline);
282 
283    private:
284     void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
285                           ModelArgumentInfo* executorInputOrOutput);
286 
287     int setInputOrOutputFromMemory(const hal::Operand& inputOrOutputOperand, const Memory* memory,
288                                    uint32_t offset, ModelArgumentInfo* inputOrOutputInfo);
289 
290     std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> computeWithMemories(
291             const std::optional<Deadline>& deadline, const std::vector<const Memory*>& memories,
292             const std::shared_ptr<ExecutionBurstController>& burstController = nullptr);
293 
294     // describes the full (possibly multiple-"step") execution
295     ExecutionBuilder* mExecutionBuilder;
296 
297     // describes the single execution step
298     const ExecutionStep* mExecutionStep = nullptr;
299 
300     // model to be executed on the executor, in both original and
301     // compiled forms; and device on which to execute it
302     const ModelBuilder* mModel;
303     std::shared_ptr<Device> mDevice;
304     std::shared_ptr<PreparedModel> mPreparedModel;
305 
306     // The information we'll send to the driver about the inputs and outputs.
307     // Note that we build this in two steps:
308     // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
309     //    If set from a pointer, don't set the location in the RequestArgument but store it
310     //    instead in mInputBuffers or mOutputBuffers.
311     // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
312     //    the m*Buffers entries.  Copy the input values into the shared memory.
313     // We do this to avoid creating a lot of shared memory objects if we have a lot of
314     // parameters specified via pointers.  We also avoid copying in the case where
315     // some of the nodes will interpreted on the CPU anyway.
316     std::vector<ModelArgumentInfo> mInputs;
317     std::vector<ModelArgumentInfo> mOutputs;
318     MemoryTracker mMemories;
319 };
320 
321 }  // namespace nn
322 }  // namespace android
323 
324 #endif  // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_EXECUTION_BUILDER_H
325