1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_VERSIONED_INTERFACES_H 18 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_VERSIONED_INTERFACES_H 19 20 #include <android-base/macros.h> 21 22 #include <cstddef> 23 #include <functional> 24 #include <memory> 25 #include <optional> 26 #include <shared_mutex> 27 #include <string> 28 #include <tuple> 29 #include <utility> 30 #include <vector> 31 32 #include "Callbacks.h" 33 #include "HalInterfaces.h" 34 #include "Utils.h" 35 36 namespace android { 37 namespace nn { 38 39 // forward declarations 40 class ExecutionBurstController; 41 class IDeviceDeathHandler; 42 class IPreparedModelDeathHandler; 43 class MetaModel; 44 class VersionedIPreparedModel; 45 46 /** 47 * Each class (VersionedIDevice, VersionedIPreparedModel) wraps a HIDL interface 48 * of any version to abstract away version differences. It allows the remainder 49 * of the runtime to always use the most up-to-date version of all HIDL types. 50 * As such, any reference to a HIDL type in the rest of the runtime 51 * will--by default--be the latest HIDL version. 52 * 53 * Each class will attempt to call the latest version of each interface method 54 * if possible. If the latest method is unavailable, the versioned class 55 * will attempt to upcast the type (e.g., V1_1::Model to V1_0::Model), and 56 * invoke the latest interface method possible. If the versioned class 57 * fails to find a matching applicable function, it will return an error. 58 */ 59 60 /** This class wraps an IDevice object of any version. */ 61 class VersionedIDevice { 62 DISALLOW_IMPLICIT_CONSTRUCTORS(VersionedIDevice); 63 64 // forward declaration of nested class 65 class Core; 66 67 public: 68 /** 69 * Create a VersionedIDevice object. 70 * 71 * Prefer using this function over the constructor, as it adds more 72 * protections. 73 * 74 * @param serviceName The name of the service that provides "device". 75 * @param makeDevice A device factory function that returns a device object 76 * that is at least version 1.0 of the IDevice interface. 77 * @return A valid VersionedIDevice object, otherwise nullptr. 78 */ 79 static std::shared_ptr<VersionedIDevice> create(std::string serviceName, 80 const hal::DeviceFactory& makeDevice); 81 82 /** 83 * Constructor for the VersionedIDevice object. 84 * 85 * VersionedIDevice will default to using the latest version of all IDevice 86 * interface methods automatically. 87 * 88 * @param capabilities Performance capabilities of the driver. 89 * @param supportedExtensions Extensions supported by the driver. 90 * @param type The device type of the driver. 91 * @param versionString The version string of the driver. 92 * @param numberOfCacheFilesNeeded Number of model cache and data cache 93 * files needed by the driver. 94 * @param serviceName The name of the service that provides core.getDevice<V1_0::IDevice>(). 95 * @param makeDevice A device factory function that returns a device object 96 * that is at least version 1.0 of the IDevice interface. 97 * @param core An object that encapsulates a V1_0::IDevice, any appropriate downcasts to 98 * newer interfaces, and a hidl_death_recipient that will proactively handle 99 * the case when the service containing the IDevice object crashes. 100 */ 101 VersionedIDevice(hal::Capabilities capabilities, 102 std::vector<hal::Extension> supportedExtensions, int32_t type, 103 std::string versionString, 104 std::pair<uint32_t, uint32_t> numberOfCacheFilesNeeded, 105 std::string serviceName, const hal::DeviceFactory& makeDevice, Core core); 106 107 /** 108 * Gets the capabilities of a driver. 109 * 110 * @return capabilities Capabilities of the driver. 111 */ 112 const hal::Capabilities& getCapabilities() const; 113 114 /** 115 * Gets information about extensions supported by the driver implementation. 116 * 117 * Extensions of category ExtensionCategory::BASE must not appear 118 * in the list. 119 * 120 * All extension operations and operands must be fully supported for the 121 * extension to appear in the list of supported extensions. 122 * 123 * @return extensions A list of supported extensions. 124 */ 125 const std::vector<hal::Extension>& getSupportedExtensions() const; 126 127 /** 128 * Gets the supported operations in a MetaModel. 129 * 130 * getSupportedOperations indicates which operations of 131 * MetaModel::getModel() are fully supported by the vendor driver. If an 132 * operation may not be supported for any reason, getSupportedOperations 133 * must return false for that operation. 134 * 135 * @param metaModel A MetaModel whose operations--and their corresponding 136 * operands--are to be verified by the driver. When 137 * metaModel.getModel() is not compliant with the HAL 138 * version of the vendor driver, the MetaModel's slicing 139 * functionality (MetaModel::getSlice*()) is employed 140 * to query the vendor driver about which of the subset of 141 * compliant operations are supported. See the MetaModel 142 * class in MetaModel.h for more details. 143 * @return status Error status of the call, must be: 144 * - NONE if successful 145 * - DEVICE_UNAVAILABLE if driver is offline or busy 146 * - GENERAL_FAILURE if there is an unspecified error 147 * - INVALID_ARGUMENT if provided model is invalid 148 * @return supportedOperations A list of supported operations, where true 149 * indicates the operation is supported and 150 * false indicates the operation is not 151 * supported. The index of "supported" 152 * corresponds with the index of the operation 153 * it is describing. 154 */ 155 std::pair<hal::ErrorStatus, hal::hidl_vec<bool>> getSupportedOperations( 156 const MetaModel& metaModel) const; 157 158 /** 159 * Creates a prepared model for execution. 160 * 161 * prepareModel is used to make any necessary transformations or alternative 162 * representations to a model for execution, possibly including 163 * transformations on the constant data, optimization on the model's graph, 164 * or compilation into the device's native binary format. The model itself 165 * is not changed. 166 * 167 * Optionally, caching information may be provided for the driver to either: 168 * - load the prepared model from cache, bypassing full model preparation 169 * - save the prepared model to cache for faster model compilation time when 170 * the same model preparation is requested in the future 171 * 172 * The prepareModel function must verify the inputs to the prepareModel 173 * function are correct. If there is an error, prepareModel must immediately 174 * return the appropriate result code and nullptr for the 175 * VersionedIPreparedModel. If the inputs to the prepareModel function are 176 * valid and there is no error, prepareModel must prepare the model. 177 * 178 * If the model was prepared successfully, prepareModel must return 179 * ANEURALNETWORKS_NO_ERROR and the produced VersionedIPreparedModel object. 180 * If an error occurred preparing the model, prepareModel must return the 181 * appropriate result code and nullptr for the VersionedIPreparedModel. 182 * 183 * The only information that may be unknown to the model at this stage is 184 * the shape of the tensors, which may only be known at execution time. As 185 * such, some driver services may return partially prepared models, where 186 * the prepared model may only be finished when it is paired with a set of 187 * inputs to the model. Note that the same prepared model object may be 188 * used with different shapes of inputs on different (possibly concurrent) 189 * executions. 190 * 191 * Multiple threads may call prepareModel on the same model concurrently. 192 * 193 * @param makeModel Factory function to create the model to be prepared for 194 * execution. 195 * @param preference Indicates the intended execution behavior of a prepared 196 * model. 197 * @param priority Priority of the prepared model relative to other prepared 198 * models owned by an application. 199 * @param deadline Optional time point. If provided, prepareModel is 200 * expected to complete by this time point. If it is not able to be 201 * completed by the deadline, the execution may be aborted. 202 * @param cacheDir String specifying the cache directory. 203 * @param maybeToken An optional caching token of length 204 * Constant::BYTE_SIZE_OF_CACHE_TOKEN identifying the prepared model. 205 * The same token will be provided when retrieving the prepared model 206 * from the cache files with prepareModelFromCache. Tokens should be 207 * chosen to have a low rate of collision for a particular application. 208 * The driver cannot detect a collision; a collision will result in a 209 * failed execution or in a successful execution that produces incorrect 210 * output values. If both modelCache and dataCache are empty indicating 211 * that caching information is not provided, this token must be ignored. 212 * @return A pair of: 213 * - Result code of preparing the model; must be: 214 * - ANEURALNETWORKS_NO_ERROR if preparation succeeded 215 * - ANEURALNETWORKS_UNAVAILABLE_DEVICE if driver is offline or busy 216 * - ANEURALNETWORKS_OP_FAILED if there is an unspecified error 217 * - ANEURALNETWORKS_BAD_DATA if one of the input arguments related 218 * to preparing the model is invalid 219 * - preparedModel A VersionedIPreparedModel object representing a model 220 * that has been prepared for execution, else nullptr. 221 */ 222 std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModel( 223 const hal::ModelFactory& makeModel, hal::ExecutionPreference preference, hal::Priority, 224 const std::optional<Deadline>& deadline, const std::string& cacheDir, 225 const std::optional<hal::CacheToken>& maybeToken) const; 226 227 /** 228 * Returns the feature level of a driver. 229 * 230 * @return featureLevel The API level of the most advanced feature this driver implements. 231 * For example, if the driver implements the features introduced in 232 * Android P, the value would be 28. 233 * Return -1 if the driver is offline or busy, or the query resulted in 234 * an unspecified error. 235 */ 236 int64_t getFeatureLevel() const; 237 238 /** 239 * Returns the device type of a driver. 240 * 241 * @return deviceType The type of a given device, which can help application 242 * developers to distribute Machine Learning workloads and other 243 * workloads such as graphical rendering. E.g., for an app which renders 244 * AR scenes based on real time object detection results, the developer 245 * could choose an ACCELERATOR type device for ML workloads, and reserve 246 * GPU for graphical rendering. 247 */ 248 int32_t getType() const; 249 250 /** 251 * Get the version string of the driver implementation. 252 * 253 * The version string must be a unique token among the set of version strings of 254 * drivers of a specific device. The token identifies the device driver's 255 * implementation. The token must not be confused with the feature level which is solely 256 * defined by the interface version. This API is opaque to the Android framework, but the 257 * Android framework may use the information for debugging or to pass on to NNAPI applications. 258 * 259 * Application developers sometimes have specific requirements to ensure good user experiences, 260 * and they need more information to make intelligent decisions when the Android framework 261 * cannot. For example, combined with the device name and other information, the token can help 262 * NNAPI applications filter devices based on their needs: 263 * - An application demands a certain level of performance, but a specific version of 264 * the driver cannot meet that requirement because of a performance regression. 265 * The application can disallow the driver based on the version provided. 266 * - An application has a minimum precision requirement, but certain versions of 267 * the driver cannot meet that requirement because of bugs or certain optimizations. 268 * The application can filter out versions of these drivers. 269 * 270 * @return version The version string of the device implementation. 271 */ 272 const std::string& getVersionString() const; 273 274 /** 275 * Gets the caching requirements of the driver implementation. 276 * 277 * There are two types of cache file descriptors provided to the driver: model cache 278 * and data cache. 279 * 280 * The data cache is for caching constant data, possibly including preprocessed 281 * and transformed tensor buffers. Any modification to the data cache should 282 * have no worse effect than generating bad output values at execution time. 283 * 284 * The model cache is for caching security-sensitive data such as compiled 285 * executable machine code in the device's native binary format. A modification 286 * to the model cache may affect the driver's execution behavior, and a malicious 287 * client could make use of this to execute beyond the granted permission. Thus, 288 * the driver must always check whether the model cache is corrupted before 289 * preparing the model from cache. 290 * 291 * getNumberOfCacheFilesNeeded returns how many of each type of cache files the driver 292 * implementation needs to cache a single prepared model. Returning 0 for both types 293 * indicates compilation caching is not supported by this driver. The driver may 294 * still choose not to cache certain compiled models even if it reports that caching 295 * is supported. 296 * 297 * If the device reports that caching is not supported, the user may avoid calling 298 * IDevice::prepareModelFromCache or providing cache file descriptors to 299 * IDevice::prepareModel_1_2. 300 * 301 * @return numModelCache An unsigned integer indicating how many files for model cache 302 * the driver needs to cache a single prepared model. It must 303 * be less than or equal to Constant::MAX_NUMBER_OF_CACHE_FILES. 304 * @return numDataCache An unsigned integer indicating how many files for data cache 305 * the driver needs to cache a single prepared model. It must 306 * be less than or equal to Constant::MAX_NUMBER_OF_CACHE_FILES. 307 */ 308 std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const; 309 310 /** 311 * Returns the name of the service. 312 * 313 * @return Name of the service. 314 */ 315 const std::string& getName() const; 316 317 /** 318 * Allocates a driver-managed buffer with the properties specified by the descriptor as well as 319 * the input and output roles of prepared models. 320 * 321 * The allocate function must verify the inputs to the allocate function are correct. If there 322 * is an error, or if a certain role or property is not supported by the driver, the allocate 323 * function must return with an appropriate ErrorStatus, a nullptr as the IBuffer, and 0 as the 324 * buffer token. If the allocation is successful, this method must return with ErrorStatus::NONE 325 * and the produced IBuffer with a positive token identifying the allocated buffer. A successful 326 * allocation must accommodate all of the specified roles and buffer properties. 327 * 328 * The buffer is allocated as an uninitialized state. An uninitialized buffer may only be used 329 * in ways that are specified by outputRoles. A buffer is initialized after it is used as an 330 * output in a successful execution, or after a successful invocation of IBuffer::copyFrom on 331 * the buffer. An initialized buffer may be used according to all roles specified in inputRoles 332 * and outputRoles. A buffer will return to the uninitialized state if it is used as an output 333 * in a failed execution, or after a failed invocation of IBuffer::copyFrom on the buffer. 334 * 335 * The driver may deduce the dimensions of the buffer according to the buffer descriptor as 336 * well as the input and output roles. The dimensions or rank of the buffer may be unknown at 337 * this stage. As such, some driver services may only create a placeholder and defer the actual 338 * allocation until execution time. Note that the same buffer may be used for different shapes 339 * of outputs on different executions. When the buffer is used as an input, the input shape 340 * must be the same as the output shape from the last execution using this buffer as an output. 341 * 342 * The driver must apply proper validatation upon every usage of the buffer, and fail the 343 * execution immediately if the usage is illegal. 344 * 345 * @param desc A buffer descriptor specifying the properties of the buffer to allocate. 346 * @param preparedModels A vector of IPreparedModel objects. Must only contain IPreparedModel 347 * objects from the same IDevice as this method invoked on. 348 * @param inputRoles A vector of roles with each specifying an input to a prepared model. 349 * @param outputRoles A vector of roles with each specifying an output to a prepared model. 350 * Each role specified in inputRoles and outputRoles must be unique. The corresponding 351 * model operands of the roles must have the same OperandType, scale, zero point, and 352 * ExtraParams. The dimensions of the operands and the dimensions specified in the buffer 353 * descriptor must be compatible with each other. Two dimensions are incompatible if there 354 * is at least one axis that is fully specified in both but has different values. 355 * @return A tuple consisting of: 356 * - Error status of the buffer allocation. Must be: 357 * - NONE if successful 358 * - DEVICE_UNAVAILABLE if driver is offline or busy 359 * - GENERAL_FAILURE if a certain buffer property or a certain role is not supported, 360 * or if there is an unspecified error 361 * - INVALID_ARGUMENT if one of the input arguments is invalid 362 * - The allocated IBuffer object. If the buffer was unable to be allocated 363 * due to an error, nullptr must be returned. 364 * - A positive token identifying the allocated buffer. The same token will be 365 * provided when referencing the buffer as one of the memory pools in the request of an 366 * execution. If the buffer was unable to be allocated due to an error, the token must be 367 * 0. 368 */ 369 std::tuple<hal::ErrorStatus, sp<hal::IBuffer>, uint32_t> allocate( 370 const hal::BufferDesc& desc, 371 const std::vector<std::shared_ptr<VersionedIPreparedModel>>& preparedModels, 372 const hal::hidl_vec<hal::BufferRole>& inputRoles, 373 const hal::hidl_vec<hal::BufferRole>& outputRoles) const; 374 375 /** 376 * Blocks until the device is not in a bad state. 377 * 378 * @return Error code after waiting. ANEURALNETWORKS_NO_ERROR if device is 379 * not in a bad state. 380 */ 381 int wait() const; 382 383 private: 384 // Cached initialization results. 385 const hal::Capabilities kCapabilities; 386 const std::vector<hal::Extension> kSupportedExtensions; 387 const int32_t kType; 388 const std::string kVersionString; 389 const std::pair<uint32_t, uint32_t> kNumberOfCacheFilesNeeded; 390 391 // internal methods to prepare a model 392 std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModelInternal( 393 const hal::Model& model, hal::ExecutionPreference preference, hal::Priority priority, 394 const std::optional<Deadline>& deadline, const std::string& cacheDir, 395 const std::optional<hal::CacheToken>& maybeToken) const; 396 std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModelFromCacheInternal( 397 const std::optional<Deadline>& deadline, const std::string& cacheDir, 398 const hal::CacheToken& token) const; 399 400 /** 401 * This is a utility class for VersionedIDevice that encapsulates a 402 * V1_0::IDevice, any appropriate downcasts to newer interfaces, and a 403 * hidl_death_recipient that will proactively handle the case when the 404 * service containing the IDevice object crashes. 405 * 406 * This is a convenience class to help VersionedIDevice recover from an 407 * IDevice object crash: It bundles together all the data that needs to 408 * change when recovering from a crash, and simplifies the process of 409 * instantiating that data (at VersionedIDevice creation time) and 410 * re-instantiating that data (at crash recovery time). 411 */ 412 class Core { 413 public: 414 /** 415 * Constructor for the Core object. 416 * 417 * Core is constructed with a V1_0::IDevice object, which represents a 418 * device that is at least v1.0 of the interface. The constructor 419 * downcasts to the latest version of the IDevice interface, allowing 420 * VersionedIDevice to default to using the latest version of all 421 * IDevice interface methods automatically. 422 * 423 * @param device A device object that is at least version 1.0 of the IDevice 424 * interface. 425 * @param deathHandler A hidl_death_recipient that will proactively handle 426 * the case when the service containing the IDevice 427 * object crashes. 428 */ 429 Core(sp<hal::V1_0::IDevice> device, sp<IDeviceDeathHandler> deathHandler); 430 431 /** 432 * Destructor for the Core object. 433 * 434 * This destructor unlinksToDeath this object's hidl_death_recipient as it 435 * no longer needs to handle the case where the IDevice's service crashes. 436 */ 437 ~Core(); 438 439 // Support move but not copy 440 Core(Core&&) noexcept; 441 Core& operator=(Core&&) noexcept; 442 Core(const Core&) = delete; 443 Core& operator=(const Core&) = delete; 444 445 /** 446 * Create a Core object. 447 * 448 * Prefer using this function over the constructor, as it adds more 449 * protections. 450 * 451 * This call linksToDeath a hidl_death_recipient that can 452 * proactively handle the case when the service containing the IDevice 453 * object crashes. 454 * 455 * @param device A device object that is at least version 1.0 of the IDevice 456 * interface. 457 * @return A valid Core object, otherwise nullopt. 458 */ 459 static std::optional<Core> create(sp<hal::V1_0::IDevice> device); 460 461 /** 462 * Returns sp<*::IDevice> that is a downcast of the sp<V1_0::IDevice> 463 * passed to the constructor. This will be nullptr if that IDevice is 464 * not actually of the specified downcast type. 465 */ 466 template <typename T_IDevice> 467 sp<T_IDevice> getDevice() const; 468 template <> getDevice()469 sp<hal::V1_0::IDevice> getDevice() const { 470 return mDeviceV1_0; 471 } 472 template <> getDevice()473 sp<hal::V1_1::IDevice> getDevice() const { 474 return mDeviceV1_1; 475 } 476 template <> getDevice()477 sp<hal::V1_2::IDevice> getDevice() const { 478 return mDeviceV1_2; 479 } 480 template <> getDevice()481 sp<hal::V1_3::IDevice> getDevice() const { 482 return mDeviceV1_3; 483 } 484 485 /** 486 * Returns sp<*::IDevice> (as per getDevice()) and the 487 * hidl_death_recipient that will proactively handle the case when the 488 * service containing the IDevice object crashes. 489 */ 490 template <typename T_IDevice> 491 std::pair<sp<T_IDevice>, sp<IDeviceDeathHandler>> getDeviceAndDeathHandler() const; 492 493 private: 494 /** 495 * All versions of IDevice are necessary because the driver could be v1.0, 496 * v1.1, or a later version. All these pointers logically represent the same 497 * object. 498 * 499 * The general strategy is: HIDL returns a V1_0 device object, which 500 * (if not nullptr) could be v1.0, v1.1, or a greater version. The V1_0 501 * object is then "dynamically cast" to a V1_1 object. If successful, 502 * mDeviceV1_1 will point to the same object as mDeviceV1_0; otherwise, 503 * mDeviceV1_1 will be nullptr. 504 * 505 * In general: 506 * * If the device is truly v1.0, mDeviceV1_0 will point to a valid object 507 * and mDeviceV1_1 will be nullptr. 508 * * If the device is truly v1.1 or later, both mDeviceV1_0 and mDeviceV1_1 509 * will point to the same valid object. 510 * 511 * Idiomatic usage: if mDeviceV1_1 is non-null, do V1_1 dispatch; otherwise, 512 * do V1_0 dispatch. 513 */ 514 sp<hal::V1_0::IDevice> mDeviceV1_0; 515 sp<hal::V1_1::IDevice> mDeviceV1_1; 516 sp<hal::V1_2::IDevice> mDeviceV1_2; 517 sp<hal::V1_3::IDevice> mDeviceV1_3; 518 519 /** 520 * HIDL callback to be invoked if the service for mDeviceV1_0 crashes. 521 * 522 * nullptr if this Core instance is a move victim and hence has no 523 * callback to be unlinked. 524 */ 525 sp<IDeviceDeathHandler> mDeathHandler; 526 }; 527 528 // This method retrieves the appropriate mCore.mDevice* field, under a read lock. 529 template <typename T_IDevice> getDevice()530 sp<T_IDevice> getDevice() const EXCLUDES(mMutex) { 531 std::shared_lock lock(mMutex); 532 return mCore.getDevice<T_IDevice>(); 533 } 534 535 // This method retrieves the appropriate mCore.mDevice* fields, under a read lock. 536 template <typename T_IDevice> getDeviceAndDeathHandler()537 auto getDeviceAndDeathHandler() const EXCLUDES(mMutex) { 538 std::shared_lock lock(mMutex); 539 return mCore.getDeviceAndDeathHandler<T_IDevice>(); 540 } 541 542 // This method calls the function fn in a manner that supports recovering 543 // from a driver crash: If the driver implementation is dead because the 544 // driver crashed either before the call to fn or during the call to fn, we 545 // will attempt to obtain a new instance of the same driver and call fn 546 // again. 547 // 548 // If a callback is provided, this method protects it against driver death 549 // and waits for it (callback->wait()). 550 template <typename T_Return, typename T_IDevice, typename T_Callback = std::nullptr_t> 551 hal::Return<T_Return> recoverable( 552 const char* context, 553 const std::function<hal::Return<T_Return>(const sp<T_IDevice>&)>& fn, 554 const T_Callback& callback = nullptr) const EXCLUDES(mMutex); 555 556 // The name of the service that implements the driver. 557 const std::string kServiceName; 558 559 // Factory function object to generate an IDevice object. 560 const hal::DeviceFactory kMakeDevice; 561 562 // Guards access to mCore. 563 mutable std::shared_mutex mMutex; 564 565 // Data that can be rewritten during driver recovery. Guarded againt 566 // synchronous access by a mutex: Any number of concurrent read accesses is 567 // permitted, but a write access excludes all other accesses. 568 mutable Core mCore GUARDED_BY(mMutex); 569 }; 570 571 /** This class wraps an IPreparedModel object of any version. */ 572 class VersionedIPreparedModel { 573 DISALLOW_IMPLICIT_CONSTRUCTORS(VersionedIPreparedModel); 574 575 public: 576 /** 577 * Constructor for the VersionedIPreparedModel object. 578 * 579 * This constructor should not be used directly. Instead, 580 * VersionedIPreparedModel should be created via 581 * VersionedIDevice::prepareModel*. 582 * 583 * VersionedIPreparedModel is constructed with the V1_0::IPreparedModel object, which 584 * represents a device that is at least v1.0 of the interface. The constructor downcasts 585 * to the latest version of the IPreparedModel interface, and will default to using the 586 * latest version of all IPreparedModel interface methods automatically. 587 * 588 * @param preparedModel A prepared model object that is least version 1.0 of the 589 * IPreparedModel interface. 590 * @param deathHandler A hidl_death_recipient that will proactively handle 591 * the case when the service containing the IDevice 592 * object crashes. 593 */ 594 VersionedIPreparedModel(sp<hal::V1_0::IPreparedModel> preparedModel, 595 sp<IPreparedModelDeathHandler> deathHandler); 596 597 /** 598 * Destructor for the VersionedIPreparedModel object. 599 * 600 * This destructor unlinksToDeath this object's hidl_death_recipient as it 601 * no longer needs to handle the case where the IPreparedModel's service 602 * crashes. 603 */ 604 ~VersionedIPreparedModel(); 605 606 /** 607 * Performs a synchronous execution on a prepared model. 608 * 609 * The execution is performed synchronously with respect to the caller. 610 * VersionedIPreparedModel::execute must verify the inputs to the function 611 * are correct. If there is an error, VersionedIPreparedModel::execute must 612 * immediately return with the appropriate result code. If the inputs to the 613 * function are valid and there is no error, 614 * VersionedIPreparedModel::execute must perform the execution, and must not 615 * return until the execution is complete. 616 * 617 * If the prepared model was prepared from a model wherein all tensor 618 * operands have fully specified dimensions, and the inputs to the function 619 * are valid, and at execution time every operation's input operands have 620 * legal values, then the execution should complete successfully 621 * (ANEURALNETWORKS_NO_ERROR): There must be no failure unless the device 622 * itself is in a bad state. 623 * 624 * execute may be called with an optional deadline. If the execution is not 625 * able to be completed before the provided deadline, the execution may be 626 * aborted, and either {@link ErrorStatus::MISSED_DEADLINE_TRANSIENT} or 627 * {@link ErrorStatus::MISSED_DEADLINE_PERSISTENT} must be returned. The 628 * error due to an abort must be sent the same way as other errors, 629 * described above. 630 * 631 * Any number of calls to the VersionedIPreparedModel::execute function, in 632 * any combination, may be made concurrently, even on the same 633 * VersionedIPreparedModel object. 634 * 635 * @param request The input and output information on which the prepared 636 * model is to be executed. 637 * @param measure Specifies whether or not to measure duration of the 638 * execution. 639 * @param deadline Optional time point. If provided, prepareModel is 640 * expected to complete by this time point. If it is not able to be 641 * completed by the deadline, the execution may be aborted. 642 * @param loopTimeoutDuration The maximum amount of time that should be spent 643 * executing a {@link OperationType::WHILE} operation. If a loop 644 * condition model does not output false within this duration, the 645 * execution must be aborted. If no loop timeout duration is provided, 646 * the maximum amount of time is {@link LoopTimeoutDurationNs::DEFAULT}. 647 * When provided, the duration must not exceed {@link 648 * LoopTimeoutDurationNs::MAXIMUM}. 649 * @param preferSynchronous 'true' to perform synchronous HAL execution when 650 * possible, 'false' to force asynchronous HAL execution. 651 * @return A tuple consisting of: 652 * - Result code of the execution, must be: 653 * - ANEURALNETWORKS_NO_ERROR if execution is performed successfully 654 * - ANEURALNETWORKS_UNAVAILABLE_DEVICE if driver is offline or busy 655 * - ANEURALNETWORKS_OP_FAILED if there is an unspecified error 656 * - ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE if at least one output 657 * operand buffer is not large enough to store the corresponding 658 * output 659 * - ANEURALNETWORKS_BAD_DATA if one of the input arguments is 660 * invalid 661 * - A list of shape information of model output operands. 662 * The index into "outputShapes" corresponds to the index of the 663 * output operand in the Request outputs vector. outputShapes must 664 * be empty unless the result code is either 665 * ANEURALNETWORKS_NO_ERROR or 666 * ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE. outputShapes may be 667 * empty if the result code is ANEURALNETWORKS_NO_ERROR and all 668 * model output operands are fully-specified at execution time. 669 * outputShapes must have the same number of elements as the number 670 * of model output operands if the result code is 671 * ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE, or if the result code 672 * is ANEURALNETWORKS_NO_ERROR and the model has at least one output 673 * operand that is not fully-specified. 674 * - Duration of execution. Unless measure is YES and result code is 675 * ANEURALNETWORKS_NO_ERROR, all times must be reported as 676 * UINT64_MAX. A driver may choose to report any time as UINT64_MAX, 677 * indicating that measurement is not available. 678 */ 679 std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> execute( 680 const hal::Request& request, hal::MeasureTiming measure, 681 const std::optional<Deadline>& deadline, 682 const hal::OptionalTimeoutDuration& loopTimeoutDuration, bool preferSynchronous) const; 683 684 /** 685 * Creates a burst controller on a prepared model. 686 * 687 * @param preferPowerOverLatency 'true' if the Burst object should run in a 688 * more power efficient mode, 'false' if more 689 * power can be used to possibly reduce 690 * burst compute latency. 691 * @return ExecutionBurstController Execution burst controller object. 692 * nullptr is returned if the burst cannot 693 * be configured for any reason. 694 */ 695 std::shared_ptr<ExecutionBurstController> configureExecutionBurst( 696 bool preferPowerOverLatency) const; 697 698 /** 699 * Launch a fenced asynchronous execution on a prepared model. 700 * 701 * The execution is performed asynchronously with respect to the caller. 702 * executeFenced must fully validate the request. If there is an error during validation, 703 * executeFenced must immediately return with the corresponding ErrorStatus. If the inputs 704 * to the function are valid and there is no error and there is no error launching, 705 * executeFenced must dispatch an asynchronous task to perform the execution in the 706 * background, and immediately return with ErrorStatus::NONE, a sync fence that will be 707 * signaled once the execution is completed, and a callback that can be used by the client 708 * to query the duration and runtime error status. If the task has finished 709 * before the call returns, empty handle may be returned for the syncFence. If the 710 * asynchronous task fails to launch, executeFenced must immediately return with 711 * ErrorStatus::GENERAL_FAILURE, an empty handle for the syncFence, and nullptr 712 * for callback. The execution must wait for all the sync fences (if any) in waitFor to be 713 * signaled before starting the actual execution. 714 * 715 * If any of sync fences in waitFor changes to error status after the executeFenced 716 * call succeeds, the driver must immediately set the returned syncFence to error status. 717 * 718 * When the asynchronous task has finished its execution, it must 719 * immediately signal the syncFence returned from executeFenced call. After 720 * the syncFence is signaled, the task must not modify the content of 721 * any data object referenced by 'request' (described by the 722 * {@link @1.0::DataLocation} of a {@link @1.0::RequestArgument}). 723 * 724 * executeFenced may be called with an optional deadline and an optional 725 * timeoutDurationAfterFence. If the execution is not able to be completed 726 * before the provided deadline or within the timeoutDurationAfterFence, 727 * whichever comes earlier, the execution may be aborted, and either {@link 728 * ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link 729 * ErrorStatus::MISSED_DEADLINE_PERSISTENT} may be returned. The error due 730 * to an abort must be sent the same way as other errors, described above. 731 * 732 * Any number of calls to the executeFenced, execute* and executeSynchronously* 733 * functions, in any combination, may be made concurrently, even on the same 734 * IPreparedModel object. 735 * 736 * @param request The input and output information on which the prepared 737 * model is to be executed. 738 * @param waitFor A vector of sync fence file descriptors. The execution must 739 * wait for all sync fence to be signaled before starting the 740 * task. 741 * @param measure Specifies whether or not to measure duration of the execution. 742 * @param deadline The time by which execution is expected to complete. If 743 * the execution cannot be finished by the deadline, the 744 * execution may be aborted. 745 * @param loopTimeoutDuration The maximum amount of time that should be spent 746 * executing a {@link OperationType::WHILE} operation. If a loop 747 * condition model does not output false within this duration, the 748 * execution must be aborted. If no loop timeout duration is provided, 749 * the maximum amount of time is {@link LoopTimeoutDurationNs::DEFAULT}. 750 * When provided, the duration must not exceed {@link 751 * LoopTimeoutDurationNs::MAXIMUM}. 752 * @param timeoutDurationAfterFence The timeout duration within which the 753 * execution is expected to complete after 754 * all sync fences in waitFor are signaled. 755 * @return A tuple consisting of: 756 * - Error code of the dispatch call. 757 * - A sync_fence that will be triggered when the task is completed. 758 * The sync_fence will be set to error if critical error occurs when doing 759 * actual evaluation. 760 * - A callback can be used to query information like duration 761 * and detailed runtime error status when the task is completed. 762 * - Optional timing information. Only useful if the call is simulated using 763 * sync execution. Either IFencedExecutionCallback will be 764 * returned or optional timing information is returned 765 */ 766 std::tuple<int, hal::hidl_handle, sp<hal::IFencedExecutionCallback>, hal::Timing> executeFenced( 767 const hal::Request& request, const hal::hidl_vec<hal::hidl_handle>& waitFor, 768 hal::MeasureTiming measure, const std::optional<Deadline>& deadline, 769 const hal::OptionalTimeoutDuration& loopTimeoutDuration, 770 const hal::OptionalTimeoutDuration& timeoutDurationAfterFence); 771 772 private: 773 friend class VersionedIDevice; 774 775 std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> executeAsynchronously( 776 const hal::Request& request, hal::MeasureTiming timing, 777 const std::optional<Deadline>& deadline, 778 const hal::OptionalTimeoutDuration& loopTimeoutDuration) const; 779 std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> executeSynchronously( 780 const hal::Request& request, hal::MeasureTiming measure, 781 const std::optional<Deadline>& deadline, 782 const hal::OptionalTimeoutDuration& loopTimeoutDuration) const; 783 784 /** 785 * Returns sp<V1_3::IPreparedModel> that is a downcast of the sp<V1_0::IPreparedModel> 786 * passed to the constructor. This will be nullptr if that IPreparedModel is 787 * not actually of the specified downcast type. 788 */ getV1_3()789 sp<hal::V1_3::IPreparedModel> getV1_3() const { return mPreparedModelV1_3; } 790 791 /** 792 * All versions of IPreparedModel are necessary because the preparedModel could be v1.0, 793 * v1.2, or a later version. All these pointers logically represent the same object. 794 * 795 * The general strategy is: HIDL returns a V1_0 prepared model object, which 796 * (if not nullptr) could be v1.0, v1.2, or a greater version. The V1_0 797 * object is then "dynamically cast" to objects of later versions. If successful, 798 * mPreparedModel* will point to the same object as mPreparedModelV1_0; otherwise, 799 * mPreparedModel* will be nullptr. 800 * 801 * In general: 802 * * If the prepared model is truly v1.0, mPreparedModelV1_0 will point to a valid object, 803 * both mPreparedModelV1_2 and mPreparedModelV1_3 will be nullptr. 804 * * If the prepared model is truly v1.2, both mPreparedModelV1_0 and mPreparedModelV1_2 805 * will point to the same valid object, but mPreparedModelV1_3 will be nullptr. 806 * * If the prepared model is truly v1.3 or later, all of mPreparedModelV1_0, 807 * mPreparedModelV1_2, and mPreparedModelV1_3 will point to the same valid object. 808 * 809 * Idiomatic usage: if mPreparedModelV1_3 is non-null, do V1_3 dispatch; 810 * otherwise, if mPreparedModelV1_2 is non-null, do V1_2 dispatch; 811 * otherwise, do V1_0 dispatch. 812 */ 813 sp<hal::V1_0::IPreparedModel> mPreparedModelV1_0; 814 sp<hal::V1_2::IPreparedModel> mPreparedModelV1_2; 815 sp<hal::V1_3::IPreparedModel> mPreparedModelV1_3; 816 817 /** 818 * HIDL callback to be invoked if the service for mPreparedModelV1_0 crashes. 819 */ 820 const sp<IPreparedModelDeathHandler> mDeathHandler; 821 }; 822 823 } // namespace nn 824 } // namespace android 825 826 #endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_VERSIONED_INTERFACES_H 827