1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Class used to build a model through a succession of successive calls
18 // to the NN API.
19 
20 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MODEL_BUILDER_H
21 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MODEL_BUILDER_H
22 
23 #include <memory>
24 #include <vector>
25 
26 #include "HalInterfaces.h"
27 #include "Memory.h"
28 #include "NeuralNetworks.h"
29 #include "Utils.h"
30 
31 namespace android {
32 namespace nn {
33 
34 class CompilationBuilder;
35 class Device;
36 class ExecutionPlan;
37 class Memory;
38 
39 class ModelBuilder {
40    public:
ModelBuilder()41     ModelBuilder() {}
42     // Returns an operand/operation type corresponding to a given extension operand/operation type.
43     int getExtensionType(const char* extensionName, uint16_t typeWithinExtension, int32_t* type);
44     // Adds an operand to the model.
45     int addOperand(const ANeuralNetworksOperandType& type);
46     int setOperandValue(uint32_t index, const void* buffer, size_t length);
47     int setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
48                                   size_t length);
49     int setOperandValueFromModel(uint32_t index, const ModelBuilder* value);
50     int setOperandSymmPerChannelQuantParams(
51             uint32_t index, const ANeuralNetworksSymmPerChannelQuantParams& extraParams);
52     int setOperandExtensionData(uint32_t index, const void* data, size_t length);
53 
54     int addOperation(ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t* inputs,
55                      uint32_t outputCount, const uint32_t* outputs);
56     int identifyInputsAndOutputs(uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
57                                  const uint32_t* outputs);
58     int relaxComputationFloat32toFloat16(bool allow);
isComputationFloat32RelaxedToFloat16()59     bool isComputationFloat32RelaxedToFloat16() const { return mRelaxComputationFloat32toFloat16; }
60 
61     int finish();
isFinished()62     bool isFinished() const { return mCompletedModel; }
isValid()63     bool isValid() const { return !mInvalidModel; }
64 
hasOEMOperation()65     bool hasOEMOperation() const { return mHasOEMOperation; }
hasExtensionOperation()66     bool hasExtensionOperation() const { return mHasExtensionOperation; }
67 
68     // explicitDeviceList is true if the list of devices was provided explicitly
69     // via the ANeuralNetworksModel_createForDevices API (which has certain
70     // special semantics) and false otherwise.
71     int createCompilation(CompilationBuilder** compilation,
72                           const std::vector<std::shared_ptr<Device>>& devices,
73                           bool explicitDeviceList = false);
74 
75     hal::Model makeHidlModel() const;
76 
operandCount()77     uint32_t operandCount() const {
78         // We don't allow more than uint32_t worth of operands
79         return static_cast<uint32_t>(mOperands.size());
80     }
operationCount()81     uint32_t operationCount() const {
82         // We don't allow more than uint32_t worth of operations
83         return static_cast<uint32_t>(mOperations.size());
84     }
inputCount()85     uint32_t inputCount() const { return static_cast<uint32_t>(mInputIndexes.size()); }
outputCount()86     uint32_t outputCount() const { return static_cast<uint32_t>(mOutputIndexes.size()); }
getInputOperandIndex(uint32_t i)87     uint32_t getInputOperandIndex(uint32_t i) const {
88         CHECK_LT(i, mInputIndexes.size());
89         return mInputIndexes[i];
90     }
getInputOperandIndexes()91     const std::vector<uint32_t>& getInputOperandIndexes() const { return mInputIndexes; }
getInputOperand(uint32_t i)92     const hal::Operand& getInputOperand(uint32_t i) const {
93         uint32_t index = getInputOperandIndex(i);
94         CHECK_LT(index, mOperands.size());
95         return mOperands[index];
96     }
getOutputOperandIndex(uint32_t i)97     uint32_t getOutputOperandIndex(uint32_t i) const {
98         CHECK_LT(i, mOutputIndexes.size());
99         return mOutputIndexes[i];
100     }
getOutputOperandIndexes()101     const std::vector<uint32_t>& getOutputOperandIndexes() const { return mOutputIndexes; }
getOutputOperand(uint32_t i)102     const hal::Operand& getOutputOperand(uint32_t i) const {
103         uint32_t index = getOutputOperandIndex(i);
104         CHECK_LT(index, mOperands.size());
105         return mOperands[index];
106     }
getOperand(uint32_t index)107     const hal::Operand& getOperand(uint32_t index) const { return mOperands[index]; }
getOperation(uint32_t index)108     const hal::Operation& getOperation(uint32_t index) const { return mOperations[index]; }
getMemories()109     const MemoryTracker& getMemories() const { return mMemories; }
getOperations()110     const std::vector<hal::Operation>& getOperations() const { return mOperations; }
getSortedOperationMapping()111     const std::vector<uint32_t>& getSortedOperationMapping() const {
112         return mSortedOperationIndexMap;
113     }
getPointerToOperandValue(uint32_t offset)114     const uint8_t* getPointerToOperandValue(uint32_t offset) const {
115         return mSmallOperandValues.data() + offset;
116     }
referencedModelCount()117     uint32_t referencedModelCount() const {
118         return static_cast<uint32_t>(mReferencedModels.size());
119     }
getReferencedModel(uint32_t i)120     const ModelBuilder* getReferencedModel(uint32_t i) const {
121         CHECK_LT(i, mReferencedModels.size());
122         return mReferencedModels[i];
123     }
getReferencedModel(const hal::Operand & operand)124     const ModelBuilder* getReferencedModel(const hal::Operand& operand) const {
125         CHECK(operand.lifetime == hal::OperandLifeTime::SUBGRAPH);
126         return getReferencedModel(operand.location.offset);
127     }
128 
129     int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices, uint32_t preference,
130                          uint32_t priority, const std::optional<Deadline>& deadline,
131                          ExecutionPlan* plan) const;
132 
133    private:
134     // TODO(b/132322449): move partitionTheWork, findBestDeviceForEachOperation,
135     // getPerformance, supportedByControlFlowInterpreter,
136     // isControlFlowOperationWithOperandOfUnknownSize, partitionTheWorkInternal,
137     // sortIntoRunOrder to CompilationBuilder?
138 
139     // Populates bestDeviceForOperation
140     //
141     // For 0 <= i < operationCount(), produces
142     //
143     //     0 <= (*bestDeviceForOperation)[i] <= devices.size()
144     //
145     // (*bestDeviceForOperation)[i] == devices.size() is a special value meaning
146     // that this is a control flow operation scheduled for interpreted execution
147     // (see LogicalStep).
148     int findBestDeviceForEachOperation(uint32_t preference,
149                                        const std::vector<std::shared_ptr<Device>>& devices,
150                                        std::vector<int>* bestDeviceForOperation) const;
151     float getPerformance(uint32_t preference, const std::shared_ptr<Device> device) const;
152     float getPerformance(uint32_t preference, const std::shared_ptr<Device> device,
153                          uint32_t operationIndex) const;
154     bool supportedByControlFlowInterpreter(uint32_t operationIndex) const;
155 
156     // Returns true if the operation is IF or WHILE and has an inner or outer
157     // input or output of unknown size.
158     bool isControlFlowOperationWithOperandOfUnknownSize(uint32_t operationIndex) const;
159 
160     int partitionTheWorkInternal(uint32_t sourceModelIndex,
161                                  const std::vector<std::shared_ptr<Device>>& devices,
162                                  uint32_t preference, uint32_t priority,
163                                  const std::optional<Deadline>& deadline,
164                                  ExecutionPlan* plan) const;
165 
166     // Return true if either mCompleteModel or mInvalidModel is true.
167     bool badState(const char* name);
168 
169     // Removes some trailing operation inputs that are set to default values.
170     //
171     // Some drivers reject operations based on the argument count even when the
172     // optional arguments are set to default values. This transformation enables
173     // more drivers to execute the model. See http://b/147105700.
174     void removeTrailingArgumentsWithDefaultValues();
175     uint32_t getNumTrailingArgumentsToRemove(const hal::Operation& operation) const;
176 
177     // Sorts the operations to be in the correct order for single threaded
178     // node-at-a-time execution.
179     bool sortIntoRunOrder();
180 
181     // Copies the large values to a shared memory, if we have any.
182     int copyLargeValuesToSharedMemory();
183 
184     // The operations of the graph.
185     std::vector<hal::Operation> mOperations;
186     // The mapping from sorted index to the original index of operations in mOperations.
187     // mSortedOperationIndexMap is empty before sortIntoRunOrder() is called.
188     std::vector<uint32_t> mSortedOperationIndexMap;
189     // Is at least one of those operations an OEM_OPERATION?
190     bool mHasOEMOperation = false;
191     // Is at least one of those operations an extension operation?
192     bool mHasExtensionOperation = false;
193     // The description of the operands of the graph.
194     std::vector<hal::Operand> mOperands;
195     // Is at least one of those operands an OEM operand?
196     bool mHasOEMOperand = false;
197     // The indexes of input operands of the model.
198     std::vector<uint32_t> mInputIndexes;
199     // The indexes of output operands of the model.
200     std::vector<uint32_t> mOutputIndexes;
201 
202     MemoryTracker mMemories;
203 
204     // The value of the small operands that are defined at model
205     // creation time.
206     std::vector<uint8_t> mSmallOperandValues;
207 
208     struct LargeValue {
209         uint32_t operandIndex;
210         const void* buffer;
211     };
212     // Operand index and buffer pointer for all the large operand values of this model.
213     std::vector<LargeValue> mLargeOperandValues;
214     // The shared memory region that will contain the large values.
215     std::unique_ptr<MemoryAshmem> mLargeValueMemory;
216 
217     // Once the model has been finished, we should not allow further
218     // modifications to the model.
219     bool mCompletedModel = false;
220 
221     // Any invalid manipulation of the model will mark the model invalid.
222     // No further modifications are allowed to the model.
223     bool mInvalidModel = false;
224 
225     // 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or
226     // precision as low as that of the IEEE 754 16-bit floating-point format.
227     // 'false' indicates TENSOR_FLOAT32 must be calculated using at least the
228     // range and precision of the IEEE 754 32-bit floating-point format.
229     bool mRelaxComputationFloat32toFloat16 = false;
230 
231     // Models referenced by operands in this model.
232     std::vector<const ModelBuilder*> mReferencedModels;
233 
234     class HidlModelMaker;
235 };
236 
237 }  // namespace nn
238 }  // namespace android
239 
240 #endif  // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MODEL_BUILDER_H
241