1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Provides C++ classes to more easily use the Neural Networks API. 18 19 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H 20 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H 21 22 #include "NeuralNetworks.h" 23 24 #include <assert.h> 25 #include <math.h> 26 #include <algorithm> 27 #include <optional> 28 #include <string> 29 #include <utility> 30 #include <vector> 31 32 namespace android { 33 namespace nn { 34 namespace wrapper { 35 36 enum class Type { 37 FLOAT32 = ANEURALNETWORKS_FLOAT32, 38 INT32 = ANEURALNETWORKS_INT32, 39 UINT32 = ANEURALNETWORKS_UINT32, 40 TENSOR_FLOAT32 = ANEURALNETWORKS_TENSOR_FLOAT32, 41 TENSOR_INT32 = ANEURALNETWORKS_TENSOR_INT32, 42 TENSOR_QUANT8_ASYMM = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, 43 BOOL = ANEURALNETWORKS_BOOL, 44 TENSOR_QUANT16_SYMM = ANEURALNETWORKS_TENSOR_QUANT16_SYMM, 45 TENSOR_FLOAT16 = ANEURALNETWORKS_TENSOR_FLOAT16, 46 TENSOR_BOOL8 = ANEURALNETWORKS_TENSOR_BOOL8, 47 FLOAT16 = ANEURALNETWORKS_FLOAT16, 48 TENSOR_QUANT8_SYMM_PER_CHANNEL = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL, 49 TENSOR_QUANT16_ASYMM = ANEURALNETWORKS_TENSOR_QUANT16_ASYMM, 50 TENSOR_QUANT8_SYMM = ANEURALNETWORKS_TENSOR_QUANT8_SYMM, 51 MODEL = ANEURALNETWORKS_MODEL, 52 }; 53 54 enum class ExecutePreference { 55 PREFER_LOW_POWER = ANEURALNETWORKS_PREFER_LOW_POWER, 56 PREFER_FAST_SINGLE_ANSWER = ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER, 57 PREFER_SUSTAINED_SPEED = ANEURALNETWORKS_PREFER_SUSTAINED_SPEED 58 }; 59 60 enum class ExecutePriority { 61 LOW = ANEURALNETWORKS_PRIORITY_LOW, 62 MEDIUM = ANEURALNETWORKS_PRIORITY_MEDIUM, 63 HIGH = ANEURALNETWORKS_PRIORITY_HIGH, 64 DEFAULT = ANEURALNETWORKS_PRIORITY_DEFAULT, 65 }; 66 67 enum class Result { 68 NO_ERROR = ANEURALNETWORKS_NO_ERROR, 69 OUT_OF_MEMORY = ANEURALNETWORKS_OUT_OF_MEMORY, 70 INCOMPLETE = ANEURALNETWORKS_INCOMPLETE, 71 UNEXPECTED_NULL = ANEURALNETWORKS_UNEXPECTED_NULL, 72 BAD_DATA = ANEURALNETWORKS_BAD_DATA, 73 OP_FAILED = ANEURALNETWORKS_OP_FAILED, 74 UNMAPPABLE = ANEURALNETWORKS_UNMAPPABLE, 75 BAD_STATE = ANEURALNETWORKS_BAD_STATE, 76 OUTPUT_INSUFFICIENT_SIZE = ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE, 77 UNAVAILABLE_DEVICE = ANEURALNETWORKS_UNAVAILABLE_DEVICE, 78 MISSED_DEADLINE_TRANSIENT = ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT, 79 MISSED_DEADLINE_PERSISTENT = ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, 80 }; 81 82 struct SymmPerChannelQuantParams { 83 ANeuralNetworksSymmPerChannelQuantParams params; 84 std::vector<float> scales; 85 SymmPerChannelQuantParamsSymmPerChannelQuantParams86 SymmPerChannelQuantParams(std::vector<float> scalesVec, uint32_t channelDim) 87 : scales(std::move(scalesVec)) { 88 params = { 89 .channelDim = channelDim, 90 .scaleCount = static_cast<uint32_t>(scales.size()), 91 .scales = scales.size() > 0 ? scales.data() : nullptr, 92 }; 93 } 94 SymmPerChannelQuantParamsSymmPerChannelQuantParams95 SymmPerChannelQuantParams(const SymmPerChannelQuantParams& other) 96 : params(other.params), scales(other.scales) { 97 params.scales = scales.size() > 0 ? scales.data() : nullptr; 98 } 99 100 SymmPerChannelQuantParams& operator=(const SymmPerChannelQuantParams& other) { 101 if (this != &other) { 102 params = other.params; 103 scales = other.scales; 104 params.scales = scales.size() > 0 ? scales.data() : nullptr; 105 } 106 return *this; 107 } 108 }; 109 110 struct OperandType { 111 ANeuralNetworksOperandType operandType; 112 std::vector<uint32_t> dimensions; 113 std::optional<SymmPerChannelQuantParams> channelQuant; 114 OperandTypeOperandType115 OperandType(const OperandType& other) 116 : operandType(other.operandType), 117 dimensions(other.dimensions), 118 channelQuant(other.channelQuant) { 119 operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr; 120 } 121 122 OperandType& operator=(const OperandType& other) { 123 if (this != &other) { 124 operandType = other.operandType; 125 dimensions = other.dimensions; 126 channelQuant = other.channelQuant; 127 operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr; 128 } 129 return *this; 130 } 131 132 OperandType(Type type, std::vector<uint32_t> d, float scale = 0.0f, int32_t zeroPoint = 0) dimensionsOperandType133 : dimensions(std::move(d)), channelQuant(std::nullopt) { 134 operandType = { 135 .type = static_cast<int32_t>(type), 136 .dimensionCount = static_cast<uint32_t>(dimensions.size()), 137 .dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr, 138 .scale = scale, 139 .zeroPoint = zeroPoint, 140 }; 141 } 142 OperandTypeOperandType143 OperandType(Type type, std::vector<uint32_t> data, SymmPerChannelQuantParams&& channelQuant) 144 : dimensions(std::move(data)), channelQuant(std::move(channelQuant)) { 145 assert(type == Type::TENSOR_QUANT8_SYMM_PER_CHANNEL); 146 147 operandType = { 148 .type = static_cast<int32_t>(type), 149 .dimensionCount = static_cast<uint32_t>(dimensions.size()), 150 .dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr, 151 .scale = 0.0f, 152 .zeroPoint = 0, 153 }; 154 } 155 }; 156 157 class Memory { 158 public: Memory(size_t size,int protect,int fd,size_t offset)159 Memory(size_t size, int protect, int fd, size_t offset) { 160 mValid = ANeuralNetworksMemory_createFromFd(size, protect, fd, offset, &mMemory) == 161 ANEURALNETWORKS_NO_ERROR; 162 } 163 Memory(AHardwareBuffer * buffer)164 Memory(AHardwareBuffer* buffer) { 165 mValid = ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &mMemory) == 166 ANEURALNETWORKS_NO_ERROR; 167 } 168 ~Memory()169 ~Memory() { ANeuralNetworksMemory_free(mMemory); } 170 171 // Disallow copy semantics to ensure the runtime object can only be freed 172 // once. Copy semantics could be enabled if some sort of reference counting 173 // or deep-copy system for runtime objects is added later. 174 Memory(const Memory&) = delete; 175 Memory& operator=(const Memory&) = delete; 176 177 // Move semantics to remove access to the runtime object from the wrapper 178 // object that is being moved. This ensures the runtime object will be 179 // freed only once. Memory(Memory && other)180 Memory(Memory&& other) { *this = std::move(other); } 181 Memory& operator=(Memory&& other) { 182 if (this != &other) { 183 ANeuralNetworksMemory_free(mMemory); 184 mMemory = other.mMemory; 185 mValid = other.mValid; 186 other.mMemory = nullptr; 187 other.mValid = false; 188 } 189 return *this; 190 } 191 get()192 ANeuralNetworksMemory* get() const { return mMemory; } isValid()193 bool isValid() const { return mValid; } 194 195 private: 196 ANeuralNetworksMemory* mMemory = nullptr; 197 bool mValid = true; 198 }; 199 200 class Model { 201 public: Model()202 Model() { 203 // TODO handle the value returned by this call 204 ANeuralNetworksModel_create(&mModel); 205 } ~Model()206 ~Model() { ANeuralNetworksModel_free(mModel); } 207 208 // Disallow copy semantics to ensure the runtime object can only be freed 209 // once. Copy semantics could be enabled if some sort of reference counting 210 // or deep-copy system for runtime objects is added later. 211 Model(const Model&) = delete; 212 Model& operator=(const Model&) = delete; 213 214 // Move semantics to remove access to the runtime object from the wrapper 215 // object that is being moved. This ensures the runtime object will be 216 // freed only once. Model(Model && other)217 Model(Model&& other) { *this = std::move(other); } 218 Model& operator=(Model&& other) { 219 if (this != &other) { 220 ANeuralNetworksModel_free(mModel); 221 mModel = other.mModel; 222 mNextOperandId = other.mNextOperandId; 223 mValid = other.mValid; 224 other.mModel = nullptr; 225 other.mNextOperandId = 0; 226 other.mValid = false; 227 } 228 return *this; 229 } 230 finish()231 Result finish() { 232 if (mValid) { 233 auto result = static_cast<Result>(ANeuralNetworksModel_finish(mModel)); 234 if (result != Result::NO_ERROR) { 235 mValid = false; 236 } 237 return result; 238 } else { 239 return Result::BAD_STATE; 240 } 241 } 242 addOperand(const OperandType * type)243 uint32_t addOperand(const OperandType* type) { 244 if (ANeuralNetworksModel_addOperand(mModel, &(type->operandType)) != 245 ANEURALNETWORKS_NO_ERROR) { 246 mValid = false; 247 } 248 if (type->channelQuant) { 249 if (ANeuralNetworksModel_setOperandSymmPerChannelQuantParams( 250 mModel, mNextOperandId, &type->channelQuant.value().params) != 251 ANEURALNETWORKS_NO_ERROR) { 252 mValid = false; 253 } 254 } 255 return mNextOperandId++; 256 } 257 setOperandValue(uint32_t index,const void * buffer,size_t length)258 void setOperandValue(uint32_t index, const void* buffer, size_t length) { 259 if (ANeuralNetworksModel_setOperandValue(mModel, index, buffer, length) != 260 ANEURALNETWORKS_NO_ERROR) { 261 mValid = false; 262 } 263 } 264 setOperandValueFromMemory(uint32_t index,const Memory * memory,uint32_t offset,size_t length)265 void setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset, 266 size_t length) { 267 if (ANeuralNetworksModel_setOperandValueFromMemory(mModel, index, memory->get(), offset, 268 length) != ANEURALNETWORKS_NO_ERROR) { 269 mValid = false; 270 } 271 } 272 addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)273 void addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs, 274 const std::vector<uint32_t>& outputs) { 275 if (ANeuralNetworksModel_addOperation(mModel, type, static_cast<uint32_t>(inputs.size()), 276 inputs.data(), static_cast<uint32_t>(outputs.size()), 277 outputs.data()) != ANEURALNETWORKS_NO_ERROR) { 278 mValid = false; 279 } 280 } identifyInputsAndOutputs(const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)281 void identifyInputsAndOutputs(const std::vector<uint32_t>& inputs, 282 const std::vector<uint32_t>& outputs) { 283 if (ANeuralNetworksModel_identifyInputsAndOutputs( 284 mModel, static_cast<uint32_t>(inputs.size()), inputs.data(), 285 static_cast<uint32_t>(outputs.size()), 286 outputs.data()) != ANEURALNETWORKS_NO_ERROR) { 287 mValid = false; 288 } 289 } 290 relaxComputationFloat32toFloat16(bool isRelax)291 void relaxComputationFloat32toFloat16(bool isRelax) { 292 if (ANeuralNetworksModel_relaxComputationFloat32toFloat16(mModel, isRelax) == 293 ANEURALNETWORKS_NO_ERROR) { 294 mRelaxed = isRelax; 295 } 296 } 297 getHandle()298 ANeuralNetworksModel* getHandle() const { return mModel; } isValid()299 bool isValid() const { return mValid; } isRelaxed()300 bool isRelaxed() const { return mRelaxed; } 301 302 protected: 303 ANeuralNetworksModel* mModel = nullptr; 304 // We keep track of the operand ID as a convenience to the caller. 305 uint32_t mNextOperandId = 0; 306 bool mValid = true; 307 bool mRelaxed = false; 308 }; 309 310 class Event { 311 public: Event()312 Event() {} ~Event()313 ~Event() { ANeuralNetworksEvent_free(mEvent); } 314 315 // Disallow copy semantics to ensure the runtime object can only be freed 316 // once. Copy semantics could be enabled if some sort of reference counting 317 // or deep-copy system for runtime objects is added later. 318 Event(const Event&) = delete; 319 Event& operator=(const Event&) = delete; 320 321 // Move semantics to remove access to the runtime object from the wrapper 322 // object that is being moved. This ensures the runtime object will be 323 // freed only once. Event(Event && other)324 Event(Event&& other) { *this = std::move(other); } 325 Event& operator=(Event&& other) { 326 if (this != &other) { 327 ANeuralNetworksEvent_free(mEvent); 328 mEvent = other.mEvent; 329 other.mEvent = nullptr; 330 } 331 return *this; 332 } 333 wait()334 Result wait() { return static_cast<Result>(ANeuralNetworksEvent_wait(mEvent)); } 335 336 // Only for use by Execution set(ANeuralNetworksEvent * newEvent)337 void set(ANeuralNetworksEvent* newEvent) { 338 ANeuralNetworksEvent_free(mEvent); 339 mEvent = newEvent; 340 } 341 342 // Only for use by Execution getHandle()343 ANeuralNetworksEvent* getHandle() const { return mEvent; } 344 345 private: 346 ANeuralNetworksEvent* mEvent = nullptr; 347 }; 348 349 class Compilation { 350 public: Compilation(const Model * model)351 Compilation(const Model* model) { 352 int result = ANeuralNetworksCompilation_create(model->getHandle(), &mCompilation); 353 if (result != 0) { 354 // TODO Handle the error 355 } 356 } 357 ~Compilation()358 ~Compilation() { ANeuralNetworksCompilation_free(mCompilation); } 359 360 // Disallow copy semantics to ensure the runtime object can only be freed 361 // once. Copy semantics could be enabled if some sort of reference counting 362 // or deep-copy system for runtime objects is added later. 363 Compilation(const Compilation&) = delete; 364 Compilation& operator=(const Compilation&) = delete; 365 366 // Move semantics to remove access to the runtime object from the wrapper 367 // object that is being moved. This ensures the runtime object will be 368 // freed only once. Compilation(Compilation && other)369 Compilation(Compilation&& other) { *this = std::move(other); } 370 Compilation& operator=(Compilation&& other) { 371 if (this != &other) { 372 ANeuralNetworksCompilation_free(mCompilation); 373 mCompilation = other.mCompilation; 374 other.mCompilation = nullptr; 375 } 376 return *this; 377 } 378 setPreference(ExecutePreference preference)379 Result setPreference(ExecutePreference preference) { 380 return static_cast<Result>(ANeuralNetworksCompilation_setPreference( 381 mCompilation, static_cast<int32_t>(preference))); 382 } 383 setPriority(ExecutePriority priority)384 Result setPriority(ExecutePriority priority) { 385 return static_cast<Result>(ANeuralNetworksCompilation_setPriority( 386 mCompilation, static_cast<int32_t>(priority))); 387 } 388 setCaching(const std::string & cacheDir,const std::vector<uint8_t> & token)389 Result setCaching(const std::string& cacheDir, const std::vector<uint8_t>& token) { 390 if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) { 391 return Result::BAD_DATA; 392 } 393 return static_cast<Result>(ANeuralNetworksCompilation_setCaching( 394 mCompilation, cacheDir.c_str(), token.data())); 395 } 396 finish()397 Result finish() { return static_cast<Result>(ANeuralNetworksCompilation_finish(mCompilation)); } 398 getHandle()399 ANeuralNetworksCompilation* getHandle() const { return mCompilation; } 400 401 private: 402 ANeuralNetworksCompilation* mCompilation = nullptr; 403 }; 404 405 class Execution { 406 public: Execution(const Compilation * compilation)407 Execution(const Compilation* compilation) { 408 int result = ANeuralNetworksExecution_create(compilation->getHandle(), &mExecution); 409 if (result != 0) { 410 // TODO Handle the error 411 } 412 } 413 ~Execution()414 ~Execution() { ANeuralNetworksExecution_free(mExecution); } 415 416 // Disallow copy semantics to ensure the runtime object can only be freed 417 // once. Copy semantics could be enabled if some sort of reference counting 418 // or deep-copy system for runtime objects is added later. 419 Execution(const Execution&) = delete; 420 Execution& operator=(const Execution&) = delete; 421 422 // Move semantics to remove access to the runtime object from the wrapper 423 // object that is being moved. This ensures the runtime object will be 424 // freed only once. Execution(Execution && other)425 Execution(Execution&& other) { *this = std::move(other); } 426 Execution& operator=(Execution&& other) { 427 if (this != &other) { 428 ANeuralNetworksExecution_free(mExecution); 429 mExecution = other.mExecution; 430 other.mExecution = nullptr; 431 } 432 return *this; 433 } 434 435 Result setInput(uint32_t index, const void* buffer, size_t length, 436 const ANeuralNetworksOperandType* type = nullptr) { 437 return static_cast<Result>( 438 ANeuralNetworksExecution_setInput(mExecution, index, type, buffer, length)); 439 } 440 441 Result setInputFromMemory(uint32_t index, const Memory* memory, uint32_t offset, 442 uint32_t length, const ANeuralNetworksOperandType* type = nullptr) { 443 return static_cast<Result>(ANeuralNetworksExecution_setInputFromMemory( 444 mExecution, index, type, memory->get(), offset, length)); 445 } 446 447 Result setOutput(uint32_t index, void* buffer, size_t length, 448 const ANeuralNetworksOperandType* type = nullptr) { 449 return static_cast<Result>( 450 ANeuralNetworksExecution_setOutput(mExecution, index, type, buffer, length)); 451 } 452 453 Result setOutputFromMemory(uint32_t index, const Memory* memory, uint32_t offset, 454 uint32_t length, const ANeuralNetworksOperandType* type = nullptr) { 455 return static_cast<Result>(ANeuralNetworksExecution_setOutputFromMemory( 456 mExecution, index, type, memory->get(), offset, length)); 457 } 458 startCompute(Event * event)459 Result startCompute(Event* event) { 460 ANeuralNetworksEvent* ev = nullptr; 461 Result result = static_cast<Result>(ANeuralNetworksExecution_startCompute(mExecution, &ev)); 462 event->set(ev); 463 return result; 464 } 465 startComputeWithDependencies(const std::vector<const Event * > & dependencies,uint64_t duration,Event * event)466 Result startComputeWithDependencies(const std::vector<const Event*>& dependencies, 467 uint64_t duration, Event* event) { 468 std::vector<const ANeuralNetworksEvent*> deps(dependencies.size()); 469 std::transform(dependencies.begin(), dependencies.end(), deps.begin(), 470 [](const Event* e) { return e->getHandle(); }); 471 ANeuralNetworksEvent* ev = nullptr; 472 Result result = static_cast<Result>(ANeuralNetworksExecution_startComputeWithDependencies( 473 mExecution, deps.data(), deps.size(), duration, &ev)); 474 event->set(ev); 475 return result; 476 } 477 compute()478 Result compute() { return static_cast<Result>(ANeuralNetworksExecution_compute(mExecution)); } 479 getOutputOperandDimensions(uint32_t index,std::vector<uint32_t> * dimensions)480 Result getOutputOperandDimensions(uint32_t index, std::vector<uint32_t>* dimensions) { 481 uint32_t rank = 0; 482 Result result = static_cast<Result>( 483 ANeuralNetworksExecution_getOutputOperandRank(mExecution, index, &rank)); 484 dimensions->resize(rank); 485 if ((result != Result::NO_ERROR && result != Result::OUTPUT_INSUFFICIENT_SIZE) || 486 rank == 0) { 487 return result; 488 } 489 result = static_cast<Result>(ANeuralNetworksExecution_getOutputOperandDimensions( 490 mExecution, index, dimensions->data())); 491 return result; 492 } 493 494 private: 495 ANeuralNetworksExecution* mExecution = nullptr; 496 }; 497 498 } // namespace wrapper 499 } // namespace nn 500 } // namespace android 501 502 #endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H 503