1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Class used to build a model through a succession of successive calls 18 // to the NN API. 19 20 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MODEL_BUILDER_H 21 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MODEL_BUILDER_H 22 23 #include <memory> 24 #include <vector> 25 26 #include "HalInterfaces.h" 27 #include "Memory.h" 28 #include "NeuralNetworks.h" 29 #include "Utils.h" 30 31 namespace android { 32 namespace nn { 33 34 class CompilationBuilder; 35 class Device; 36 class ExecutionPlan; 37 class Memory; 38 39 class ModelBuilder { 40 public: ModelBuilder()41 ModelBuilder() {} 42 // Returns an operand/operation type corresponding to a given extension operand/operation type. 43 int getExtensionType(const char* extensionName, uint16_t typeWithinExtension, int32_t* type); 44 // Adds an operand to the model. 45 int addOperand(const ANeuralNetworksOperandType& type); 46 int setOperandValue(uint32_t index, const void* buffer, size_t length); 47 int setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset, 48 size_t length); 49 int setOperandValueFromModel(uint32_t index, const ModelBuilder* value); 50 int setOperandSymmPerChannelQuantParams( 51 uint32_t index, const ANeuralNetworksSymmPerChannelQuantParams& extraParams); 52 int setOperandExtensionData(uint32_t index, const void* data, size_t length); 53 54 int addOperation(ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t* inputs, 55 uint32_t outputCount, const uint32_t* outputs); 56 int identifyInputsAndOutputs(uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, 57 const uint32_t* outputs); 58 int relaxComputationFloat32toFloat16(bool allow); isComputationFloat32RelaxedToFloat16()59 bool isComputationFloat32RelaxedToFloat16() const { return mRelaxComputationFloat32toFloat16; } 60 61 int finish(); isFinished()62 bool isFinished() const { return mCompletedModel; } isValid()63 bool isValid() const { return !mInvalidModel; } 64 hasOEMOperation()65 bool hasOEMOperation() const { return mHasOEMOperation; } hasExtensionOperation()66 bool hasExtensionOperation() const { return mHasExtensionOperation; } 67 68 // explicitDeviceList is true if the list of devices was provided explicitly 69 // via the ANeuralNetworksModel_createForDevices API (which has certain 70 // special semantics) and false otherwise. 71 int createCompilation(CompilationBuilder** compilation, 72 const std::vector<std::shared_ptr<Device>>& devices, 73 bool explicitDeviceList = false); 74 75 hal::Model makeHidlModel() const; 76 operandCount()77 uint32_t operandCount() const { 78 // We don't allow more than uint32_t worth of operands 79 return static_cast<uint32_t>(mOperands.size()); 80 } operationCount()81 uint32_t operationCount() const { 82 // We don't allow more than uint32_t worth of operations 83 return static_cast<uint32_t>(mOperations.size()); 84 } inputCount()85 uint32_t inputCount() const { return static_cast<uint32_t>(mInputIndexes.size()); } outputCount()86 uint32_t outputCount() const { return static_cast<uint32_t>(mOutputIndexes.size()); } getInputOperandIndex(uint32_t i)87 uint32_t getInputOperandIndex(uint32_t i) const { 88 CHECK_LT(i, mInputIndexes.size()); 89 return mInputIndexes[i]; 90 } getInputOperandIndexes()91 const std::vector<uint32_t>& getInputOperandIndexes() const { return mInputIndexes; } getInputOperand(uint32_t i)92 const hal::Operand& getInputOperand(uint32_t i) const { 93 uint32_t index = getInputOperandIndex(i); 94 CHECK_LT(index, mOperands.size()); 95 return mOperands[index]; 96 } getOutputOperandIndex(uint32_t i)97 uint32_t getOutputOperandIndex(uint32_t i) const { 98 CHECK_LT(i, mOutputIndexes.size()); 99 return mOutputIndexes[i]; 100 } getOutputOperandIndexes()101 const std::vector<uint32_t>& getOutputOperandIndexes() const { return mOutputIndexes; } getOutputOperand(uint32_t i)102 const hal::Operand& getOutputOperand(uint32_t i) const { 103 uint32_t index = getOutputOperandIndex(i); 104 CHECK_LT(index, mOperands.size()); 105 return mOperands[index]; 106 } getOperand(uint32_t index)107 const hal::Operand& getOperand(uint32_t index) const { return mOperands[index]; } getOperation(uint32_t index)108 const hal::Operation& getOperation(uint32_t index) const { return mOperations[index]; } getMemories()109 const MemoryTracker& getMemories() const { return mMemories; } getOperations()110 const std::vector<hal::Operation>& getOperations() const { return mOperations; } getSortedOperationMapping()111 const std::vector<uint32_t>& getSortedOperationMapping() const { 112 return mSortedOperationIndexMap; 113 } getPointerToOperandValue(uint32_t offset)114 const uint8_t* getPointerToOperandValue(uint32_t offset) const { 115 return mSmallOperandValues.data() + offset; 116 } referencedModelCount()117 uint32_t referencedModelCount() const { 118 return static_cast<uint32_t>(mReferencedModels.size()); 119 } getReferencedModel(uint32_t i)120 const ModelBuilder* getReferencedModel(uint32_t i) const { 121 CHECK_LT(i, mReferencedModels.size()); 122 return mReferencedModels[i]; 123 } getReferencedModel(const hal::Operand & operand)124 const ModelBuilder* getReferencedModel(const hal::Operand& operand) const { 125 CHECK(operand.lifetime == hal::OperandLifeTime::SUBGRAPH); 126 return getReferencedModel(operand.location.offset); 127 } 128 129 int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices, uint32_t preference, 130 uint32_t priority, const std::optional<Deadline>& deadline, 131 ExecutionPlan* plan) const; 132 133 private: 134 // TODO(b/132322449): move partitionTheWork, findBestDeviceForEachOperation, 135 // getPerformance, supportedByControlFlowInterpreter, 136 // isControlFlowOperationWithOperandOfUnknownSize, partitionTheWorkInternal, 137 // sortIntoRunOrder to CompilationBuilder? 138 139 // Populates bestDeviceForOperation 140 // 141 // For 0 <= i < operationCount(), produces 142 // 143 // 0 <= (*bestDeviceForOperation)[i] <= devices.size() 144 // 145 // (*bestDeviceForOperation)[i] == devices.size() is a special value meaning 146 // that this is a control flow operation scheduled for interpreted execution 147 // (see LogicalStep). 148 int findBestDeviceForEachOperation(uint32_t preference, 149 const std::vector<std::shared_ptr<Device>>& devices, 150 std::vector<int>* bestDeviceForOperation) const; 151 float getPerformance(uint32_t preference, const std::shared_ptr<Device> device) const; 152 float getPerformance(uint32_t preference, const std::shared_ptr<Device> device, 153 uint32_t operationIndex) const; 154 bool supportedByControlFlowInterpreter(uint32_t operationIndex) const; 155 156 // Returns true if the operation is IF or WHILE and has an inner or outer 157 // input or output of unknown size. 158 bool isControlFlowOperationWithOperandOfUnknownSize(uint32_t operationIndex) const; 159 160 int partitionTheWorkInternal(uint32_t sourceModelIndex, 161 const std::vector<std::shared_ptr<Device>>& devices, 162 uint32_t preference, uint32_t priority, 163 const std::optional<Deadline>& deadline, 164 ExecutionPlan* plan) const; 165 166 // Return true if either mCompleteModel or mInvalidModel is true. 167 bool badState(const char* name); 168 169 // Removes some trailing operation inputs that are set to default values. 170 // 171 // Some drivers reject operations based on the argument count even when the 172 // optional arguments are set to default values. This transformation enables 173 // more drivers to execute the model. See http://b/147105700. 174 void removeTrailingArgumentsWithDefaultValues(); 175 uint32_t getNumTrailingArgumentsToRemove(const hal::Operation& operation) const; 176 177 // Sorts the operations to be in the correct order for single threaded 178 // node-at-a-time execution. 179 bool sortIntoRunOrder(); 180 181 // Copies the large values to a shared memory, if we have any. 182 int copyLargeValuesToSharedMemory(); 183 184 // The operations of the graph. 185 std::vector<hal::Operation> mOperations; 186 // The mapping from sorted index to the original index of operations in mOperations. 187 // mSortedOperationIndexMap is empty before sortIntoRunOrder() is called. 188 std::vector<uint32_t> mSortedOperationIndexMap; 189 // Is at least one of those operations an OEM_OPERATION? 190 bool mHasOEMOperation = false; 191 // Is at least one of those operations an extension operation? 192 bool mHasExtensionOperation = false; 193 // The description of the operands of the graph. 194 std::vector<hal::Operand> mOperands; 195 // Is at least one of those operands an OEM operand? 196 bool mHasOEMOperand = false; 197 // The indexes of input operands of the model. 198 std::vector<uint32_t> mInputIndexes; 199 // The indexes of output operands of the model. 200 std::vector<uint32_t> mOutputIndexes; 201 202 MemoryTracker mMemories; 203 204 // The value of the small operands that are defined at model 205 // creation time. 206 std::vector<uint8_t> mSmallOperandValues; 207 208 struct LargeValue { 209 uint32_t operandIndex; 210 const void* buffer; 211 }; 212 // Operand index and buffer pointer for all the large operand values of this model. 213 std::vector<LargeValue> mLargeOperandValues; 214 // The shared memory region that will contain the large values. 215 std::unique_ptr<MemoryAshmem> mLargeValueMemory; 216 217 // Once the model has been finished, we should not allow further 218 // modifications to the model. 219 bool mCompletedModel = false; 220 221 // Any invalid manipulation of the model will mark the model invalid. 222 // No further modifications are allowed to the model. 223 bool mInvalidModel = false; 224 225 // 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or 226 // precision as low as that of the IEEE 754 16-bit floating-point format. 227 // 'false' indicates TENSOR_FLOAT32 must be calculated using at least the 228 // range and precision of the IEEE 754 32-bit floating-point format. 229 bool mRelaxComputationFloat32toFloat16 = false; 230 231 // Models referenced by operands in this model. 232 std::vector<const ModelBuilder*> mReferencedModels; 233 234 class HidlModelMaker; 235 }; 236 237 } // namespace nn 238 } // namespace android 239 240 #endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MODEL_BUILDER_H 241