1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_VERSIONED_INTERFACES_H
18 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_VERSIONED_INTERFACES_H
19 
20 #include <android-base/macros.h>
21 
22 #include <cstddef>
23 #include <functional>
24 #include <memory>
25 #include <optional>
26 #include <shared_mutex>
27 #include <string>
28 #include <tuple>
29 #include <utility>
30 #include <vector>
31 
32 #include "Callbacks.h"
33 #include "HalInterfaces.h"
34 #include "Utils.h"
35 
36 namespace android {
37 namespace nn {
38 
39 // forward declarations
40 class ExecutionBurstController;
41 class IDeviceDeathHandler;
42 class IPreparedModelDeathHandler;
43 class MetaModel;
44 class VersionedIPreparedModel;
45 
46 /**
47  * Each class (VersionedIDevice, VersionedIPreparedModel) wraps a HIDL interface
48  * of any version to abstract away version differences. It allows the remainder
49  * of the runtime to always use the most up-to-date version of all HIDL types.
50  * As such, any reference to a HIDL type in the rest of the runtime
51  * will--by default--be the latest HIDL version.
52  *
53  * Each class will attempt to call the latest version of each interface method
54  * if possible. If the latest method is unavailable, the versioned class
55  * will attempt to upcast the type (e.g., V1_1::Model to V1_0::Model), and
56  * invoke the latest interface method possible. If the versioned class
57  * fails to find a matching applicable function, it will return an error.
58  */
59 
60 /** This class wraps an IDevice object of any version. */
61 class VersionedIDevice {
62     DISALLOW_IMPLICIT_CONSTRUCTORS(VersionedIDevice);
63 
64     // forward declaration of nested class
65     class Core;
66 
67    public:
68     /**
69      * Create a VersionedIDevice object.
70      *
71      * Prefer using this function over the constructor, as it adds more
72      * protections.
73      *
74      * @param serviceName The name of the service that provides "device".
75      * @param makeDevice A device factory function that returns a device object
76      *                   that is at least version 1.0 of the IDevice interface.
77      * @return A valid VersionedIDevice object, otherwise nullptr.
78      */
79     static std::shared_ptr<VersionedIDevice> create(std::string serviceName,
80                                                     const hal::DeviceFactory& makeDevice);
81 
82     /**
83      * Constructor for the VersionedIDevice object.
84      *
85      * VersionedIDevice will default to using the latest version of all IDevice
86      * interface methods automatically.
87      *
88      * @param capabilities Performance capabilities of the driver.
89      * @param supportedExtensions Extensions supported by the driver.
90      * @param type The device type of the driver.
91      * @param versionString The version string of the driver.
92      * @param numberOfCacheFilesNeeded Number of model cache and data cache
93      *     files needed by the driver.
94      * @param serviceName The name of the service that provides core.getDevice<V1_0::IDevice>().
95      * @param makeDevice A device factory function that returns a device object
96      *                   that is at least version 1.0 of the IDevice interface.
97      * @param core An object that encapsulates a V1_0::IDevice, any appropriate downcasts to
98      *             newer interfaces, and a hidl_death_recipient that will proactively handle
99      *             the case when the service containing the IDevice object crashes.
100      */
101     VersionedIDevice(hal::Capabilities capabilities,
102                      std::vector<hal::Extension> supportedExtensions, int32_t type,
103                      std::string versionString,
104                      std::pair<uint32_t, uint32_t> numberOfCacheFilesNeeded,
105                      std::string serviceName, const hal::DeviceFactory& makeDevice, Core core);
106 
107     /**
108      * Gets the capabilities of a driver.
109      *
110      * @return capabilities Capabilities of the driver.
111      */
112     const hal::Capabilities& getCapabilities() const;
113 
114     /**
115      * Gets information about extensions supported by the driver implementation.
116      *
117      * Extensions of category ExtensionCategory::BASE must not appear
118      * in the list.
119      *
120      * All extension operations and operands must be fully supported for the
121      * extension to appear in the list of supported extensions.
122      *
123      * @return extensions A list of supported extensions.
124      */
125     const std::vector<hal::Extension>& getSupportedExtensions() const;
126 
127     /**
128      * Gets the supported operations in a MetaModel.
129      *
130      * getSupportedOperations indicates which operations of
131      * MetaModel::getModel() are fully supported by the vendor driver. If an
132      * operation may not be supported for any reason, getSupportedOperations
133      * must return false for that operation.
134      *
135      * @param metaModel A MetaModel whose operations--and their corresponding
136      *                  operands--are to be verified by the driver.  When
137      *                  metaModel.getModel() is not compliant with the HAL
138      *                  version of the vendor driver, the MetaModel's slicing
139      *                  functionality (MetaModel::getSlice*()) is employed
140      *                  to query the vendor driver about which of the subset of
141      *                  compliant operations are supported.  See the MetaModel
142      *                  class in MetaModel.h for more details.
143      * @return status Error status of the call, must be:
144      *                - NONE if successful
145      *                - DEVICE_UNAVAILABLE if driver is offline or busy
146      *                - GENERAL_FAILURE if there is an unspecified error
147      *                - INVALID_ARGUMENT if provided model is invalid
148      * @return supportedOperations A list of supported operations, where true
149      *                             indicates the operation is supported and
150      *                             false indicates the operation is not
151      *                             supported. The index of "supported"
152      *                             corresponds with the index of the operation
153      *                             it is describing.
154      */
155     std::pair<hal::ErrorStatus, hal::hidl_vec<bool>> getSupportedOperations(
156             const MetaModel& metaModel) const;
157 
158     /**
159      * Creates a prepared model for execution.
160      *
161      * prepareModel is used to make any necessary transformations or alternative
162      * representations to a model for execution, possibly including
163      * transformations on the constant data, optimization on the model's graph,
164      * or compilation into the device's native binary format. The model itself
165      * is not changed.
166      *
167      * Optionally, caching information may be provided for the driver to either:
168      * - load the prepared model from cache, bypassing full model preparation
169      * - save the prepared model to cache for faster model compilation time when
170      *     the same model preparation is requested in the future
171      *
172      * The prepareModel function must verify the inputs to the prepareModel
173      * function are correct. If there is an error, prepareModel must immediately
174      * return the appropriate result code and nullptr for the
175      * VersionedIPreparedModel. If the inputs to the prepareModel function are
176      * valid and there is no error, prepareModel must prepare the model.
177      *
178      * If the model was prepared successfully, prepareModel must return
179      * ANEURALNETWORKS_NO_ERROR and the produced VersionedIPreparedModel object.
180      * If an error occurred preparing the model, prepareModel must return the
181      * appropriate result code and nullptr for the VersionedIPreparedModel.
182      *
183      * The only information that may be unknown to the model at this stage is
184      * the shape of the tensors, which may only be known at execution time. As
185      * such, some driver services may return partially prepared models, where
186      * the prepared model may only be finished when it is paired with a set of
187      * inputs to the model. Note that the same prepared model object may be
188      * used with different shapes of inputs on different (possibly concurrent)
189      * executions.
190      *
191      * Multiple threads may call prepareModel on the same model concurrently.
192      *
193      * @param makeModel Factory function to create the model to be prepared for
194      *     execution.
195      * @param preference Indicates the intended execution behavior of a prepared
196      *     model.
197      * @param priority Priority of the prepared model relative to other prepared
198      *     models owned by an application.
199      * @param deadline Optional time point. If provided, prepareModel is
200      *     expected to complete by this time point. If it is not able to be
201      *     completed by the deadline, the execution may be aborted.
202      * @param cacheDir String specifying the cache directory.
203      * @param maybeToken An optional caching token of length
204      *     Constant::BYTE_SIZE_OF_CACHE_TOKEN identifying the prepared model.
205      *     The same token will be provided when retrieving the prepared model
206      *     from the cache files with prepareModelFromCache. Tokens should be
207      *     chosen to have a low rate of collision for a particular application.
208      *     The driver cannot detect a collision; a collision will result in a
209      *     failed execution or in a successful execution that produces incorrect
210      *     output values. If both modelCache and dataCache are empty indicating
211      *     that caching information is not provided, this token must be ignored.
212      * @return A pair of:
213      *     - Result code of preparing the model; must be:
214      *         - ANEURALNETWORKS_NO_ERROR if preparation succeeded
215      *         - ANEURALNETWORKS_UNAVAILABLE_DEVICE if driver is offline or busy
216      *         - ANEURALNETWORKS_OP_FAILED if there is an unspecified error
217      *         - ANEURALNETWORKS_BAD_DATA if one of the input arguments related
218      *             to preparing the model is invalid
219      *     - preparedModel A VersionedIPreparedModel object representing a model
220      *         that has been prepared for execution, else nullptr.
221      */
222     std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModel(
223             const hal::ModelFactory& makeModel, hal::ExecutionPreference preference, hal::Priority,
224             const std::optional<Deadline>& deadline, const std::string& cacheDir,
225             const std::optional<hal::CacheToken>& maybeToken) const;
226 
227     /**
228      * Returns the feature level of a driver.
229      *
230      * @return featureLevel The API level of the most advanced feature this driver implements.
231      *                      For example, if the driver implements the features introduced in
232      *                      Android P, the value would be 28.
233      *                      Return -1 if the driver is offline or busy, or the query resulted in
234      *                      an unspecified error.
235      */
236     int64_t getFeatureLevel() const;
237 
238     /**
239      * Returns the device type of a driver.
240      *
241      * @return deviceType The type of a given device, which can help application
242      *     developers to distribute Machine Learning workloads and other
243      *     workloads such as graphical rendering. E.g., for an app which renders
244      *     AR scenes based on real time object detection results, the developer
245      *     could choose an ACCELERATOR type device for ML workloads, and reserve
246      *     GPU for graphical rendering.
247      */
248     int32_t getType() const;
249 
250     /**
251      * Get the version string of the driver implementation.
252      *
253      * The version string must be a unique token among the set of version strings of
254      * drivers of a specific device. The token identifies the device driver's
255      * implementation. The token must not be confused with the feature level which is solely
256      * defined by the interface version. This API is opaque to the Android framework, but the
257      * Android framework may use the information for debugging or to pass on to NNAPI applications.
258      *
259      * Application developers sometimes have specific requirements to ensure good user experiences,
260      * and they need more information to make intelligent decisions when the Android framework
261      * cannot. For example, combined with the device name and other information, the token can help
262      * NNAPI applications filter devices based on their needs:
263      *     - An application demands a certain level of performance, but a specific version of
264      *       the driver cannot meet that requirement because of a performance regression.
265      *       The application can disallow the driver based on the version provided.
266      *     - An application has a minimum precision requirement, but certain versions of
267      *       the driver cannot meet that requirement because of bugs or certain optimizations.
268      *       The application can filter out versions of these drivers.
269      *
270      * @return version The version string of the device implementation.
271      */
272     const std::string& getVersionString() const;
273 
274     /**
275      * Gets the caching requirements of the driver implementation.
276      *
277      * There are two types of cache file descriptors provided to the driver: model cache
278      * and data cache.
279      *
280      * The data cache is for caching constant data, possibly including preprocessed
281      * and transformed tensor buffers. Any modification to the data cache should
282      * have no worse effect than generating bad output values at execution time.
283      *
284      * The model cache is for caching security-sensitive data such as compiled
285      * executable machine code in the device's native binary format. A modification
286      * to the model cache may affect the driver's execution behavior, and a malicious
287      * client could make use of this to execute beyond the granted permission. Thus,
288      * the driver must always check whether the model cache is corrupted before
289      * preparing the model from cache.
290      *
291      * getNumberOfCacheFilesNeeded returns how many of each type of cache files the driver
292      * implementation needs to cache a single prepared model. Returning 0 for both types
293      * indicates compilation caching is not supported by this driver. The driver may
294      * still choose not to cache certain compiled models even if it reports that caching
295      * is supported.
296      *
297      * If the device reports that caching is not supported, the user may avoid calling
298      * IDevice::prepareModelFromCache or providing cache file descriptors to
299      * IDevice::prepareModel_1_2.
300      *
301      * @return numModelCache An unsigned integer indicating how many files for model cache
302      *                       the driver needs to cache a single prepared model. It must
303      *                       be less than or equal to Constant::MAX_NUMBER_OF_CACHE_FILES.
304      * @return numDataCache An unsigned integer indicating how many files for data cache
305      *                      the driver needs to cache a single prepared model. It must
306      *                      be less than or equal to Constant::MAX_NUMBER_OF_CACHE_FILES.
307      */
308     std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const;
309 
310     /**
311      * Returns the name of the service.
312      *
313      * @return Name of the service.
314      */
315     const std::string& getName() const;
316 
317     /**
318      * Allocates a driver-managed buffer with the properties specified by the descriptor as well as
319      * the input and output roles of prepared models.
320      *
321      * The allocate function must verify the inputs to the allocate function are correct. If there
322      * is an error, or if a certain role or property is not supported by the driver, the allocate
323      * function must return with an appropriate ErrorStatus, a nullptr as the IBuffer, and 0 as the
324      * buffer token. If the allocation is successful, this method must return with ErrorStatus::NONE
325      * and the produced IBuffer with a positive token identifying the allocated buffer. A successful
326      * allocation must accommodate all of the specified roles and buffer properties.
327      *
328      * The buffer is allocated as an uninitialized state. An uninitialized buffer may only be used
329      * in ways that are specified by outputRoles. A buffer is initialized after it is used as an
330      * output in a successful execution, or after a successful invocation of IBuffer::copyFrom on
331      * the buffer. An initialized buffer may be used according to all roles specified in inputRoles
332      * and outputRoles. A buffer will return to the uninitialized state if it is used as an output
333      * in a failed execution, or after a failed invocation of IBuffer::copyFrom on the buffer.
334      *
335      * The driver may deduce the dimensions of the buffer according to the buffer descriptor as
336      * well as the input and output roles. The dimensions or rank of the buffer may be unknown at
337      * this stage. As such, some driver services may only create a placeholder and defer the actual
338      * allocation until execution time. Note that the same buffer may be used for different shapes
339      * of outputs on different executions. When the buffer is used as an input, the input shape
340      * must be the same as the output shape from the last execution using this buffer as an output.
341      *
342      * The driver must apply proper validatation upon every usage of the buffer, and fail the
343      * execution immediately if the usage is illegal.
344      *
345      * @param desc A buffer descriptor specifying the properties of the buffer to allocate.
346      * @param preparedModels A vector of IPreparedModel objects. Must only contain IPreparedModel
347      *     objects from the same IDevice as this method invoked on.
348      * @param inputRoles A vector of roles with each specifying an input to a prepared model.
349      * @param outputRoles A vector of roles with each specifying an output to a prepared model.
350      *     Each role specified in inputRoles and outputRoles must be unique. The corresponding
351      *     model operands of the roles must have the same OperandType, scale, zero point, and
352      *     ExtraParams. The dimensions of the operands and the dimensions specified in the buffer
353      *     descriptor must be compatible with each other. Two dimensions are incompatible if there
354      *     is at least one axis that is fully specified in both but has different values.
355      * @return A tuple consisting of:
356      *     - Error status of the buffer allocation. Must be:
357      *         - NONE if successful
358      *         - DEVICE_UNAVAILABLE if driver is offline or busy
359      *         - GENERAL_FAILURE if a certain buffer property or a certain role is not supported,
360      *           or if there is an unspecified error
361      *         - INVALID_ARGUMENT if one of the input arguments is invalid
362      *     - The allocated IBuffer object. If the buffer was unable to be allocated
363      *       due to an error, nullptr must be returned.
364      *     - A positive token identifying the allocated buffer. The same token will be
365      *       provided when referencing the buffer as one of the memory pools in the request of an
366      *       execution. If the buffer was unable to be allocated due to an error, the token must be
367      *       0.
368      */
369     std::tuple<hal::ErrorStatus, sp<hal::IBuffer>, uint32_t> allocate(
370             const hal::BufferDesc& desc,
371             const std::vector<std::shared_ptr<VersionedIPreparedModel>>& preparedModels,
372             const hal::hidl_vec<hal::BufferRole>& inputRoles,
373             const hal::hidl_vec<hal::BufferRole>& outputRoles) const;
374 
375     /**
376      * Blocks until the device is not in a bad state.
377      *
378      * @return Error code after waiting. ANEURALNETWORKS_NO_ERROR if device is
379      *     not in a bad state.
380      */
381     int wait() const;
382 
383    private:
384     // Cached initialization results.
385     const hal::Capabilities kCapabilities;
386     const std::vector<hal::Extension> kSupportedExtensions;
387     const int32_t kType;
388     const std::string kVersionString;
389     const std::pair<uint32_t, uint32_t> kNumberOfCacheFilesNeeded;
390 
391     // internal methods to prepare a model
392     std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModelInternal(
393             const hal::Model& model, hal::ExecutionPreference preference, hal::Priority priority,
394             const std::optional<Deadline>& deadline, const std::string& cacheDir,
395             const std::optional<hal::CacheToken>& maybeToken) const;
396     std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModelFromCacheInternal(
397             const std::optional<Deadline>& deadline, const std::string& cacheDir,
398             const hal::CacheToken& token) const;
399 
400     /**
401      * This is a utility class for VersionedIDevice that encapsulates a
402      * V1_0::IDevice, any appropriate downcasts to newer interfaces, and a
403      * hidl_death_recipient that will proactively handle the case when the
404      * service containing the IDevice object crashes.
405      *
406      * This is a convenience class to help VersionedIDevice recover from an
407      * IDevice object crash: It bundles together all the data that needs to
408      * change when recovering from a crash, and simplifies the process of
409      * instantiating that data (at VersionedIDevice creation time) and
410      * re-instantiating that data (at crash recovery time).
411      */
412     class Core {
413        public:
414         /**
415          * Constructor for the Core object.
416          *
417          * Core is constructed with a V1_0::IDevice object, which represents a
418          * device that is at least v1.0 of the interface. The constructor
419          * downcasts to the latest version of the IDevice interface, allowing
420          * VersionedIDevice to default to using the latest version of all
421          * IDevice interface methods automatically.
422          *
423          * @param device A device object that is at least version 1.0 of the IDevice
424          *               interface.
425          * @param deathHandler A hidl_death_recipient that will proactively handle
426          *                     the case when the service containing the IDevice
427          *                     object crashes.
428          */
429         Core(sp<hal::V1_0::IDevice> device, sp<IDeviceDeathHandler> deathHandler);
430 
431         /**
432          * Destructor for the Core object.
433          *
434          * This destructor unlinksToDeath this object's hidl_death_recipient as it
435          * no longer needs to handle the case where the IDevice's service crashes.
436          */
437         ~Core();
438 
439         // Support move but not copy
440         Core(Core&&) noexcept;
441         Core& operator=(Core&&) noexcept;
442         Core(const Core&) = delete;
443         Core& operator=(const Core&) = delete;
444 
445         /**
446          * Create a Core object.
447          *
448          * Prefer using this function over the constructor, as it adds more
449          * protections.
450          *
451          * This call linksToDeath a hidl_death_recipient that can
452          * proactively handle the case when the service containing the IDevice
453          * object crashes.
454          *
455          * @param device A device object that is at least version 1.0 of the IDevice
456          *               interface.
457          * @return A valid Core object, otherwise nullopt.
458          */
459         static std::optional<Core> create(sp<hal::V1_0::IDevice> device);
460 
461         /**
462          * Returns sp<*::IDevice> that is a downcast of the sp<V1_0::IDevice>
463          * passed to the constructor.  This will be nullptr if that IDevice is
464          * not actually of the specified downcast type.
465          */
466         template <typename T_IDevice>
467         sp<T_IDevice> getDevice() const;
468         template <>
getDevice()469         sp<hal::V1_0::IDevice> getDevice() const {
470             return mDeviceV1_0;
471         }
472         template <>
getDevice()473         sp<hal::V1_1::IDevice> getDevice() const {
474             return mDeviceV1_1;
475         }
476         template <>
getDevice()477         sp<hal::V1_2::IDevice> getDevice() const {
478             return mDeviceV1_2;
479         }
480         template <>
getDevice()481         sp<hal::V1_3::IDevice> getDevice() const {
482             return mDeviceV1_3;
483         }
484 
485         /**
486          * Returns sp<*::IDevice> (as per getDevice()) and the
487          * hidl_death_recipient that will proactively handle the case when the
488          * service containing the IDevice object crashes.
489          */
490         template <typename T_IDevice>
491         std::pair<sp<T_IDevice>, sp<IDeviceDeathHandler>> getDeviceAndDeathHandler() const;
492 
493        private:
494         /**
495          * All versions of IDevice are necessary because the driver could be v1.0,
496          * v1.1, or a later version. All these pointers logically represent the same
497          * object.
498          *
499          * The general strategy is: HIDL returns a V1_0 device object, which
500          * (if not nullptr) could be v1.0, v1.1, or a greater version. The V1_0
501          * object is then "dynamically cast" to a V1_1 object. If successful,
502          * mDeviceV1_1 will point to the same object as mDeviceV1_0; otherwise,
503          * mDeviceV1_1 will be nullptr.
504          *
505          * In general:
506          * * If the device is truly v1.0, mDeviceV1_0 will point to a valid object
507          *   and mDeviceV1_1 will be nullptr.
508          * * If the device is truly v1.1 or later, both mDeviceV1_0 and mDeviceV1_1
509          *   will point to the same valid object.
510          *
511          * Idiomatic usage: if mDeviceV1_1 is non-null, do V1_1 dispatch; otherwise,
512          * do V1_0 dispatch.
513          */
514         sp<hal::V1_0::IDevice> mDeviceV1_0;
515         sp<hal::V1_1::IDevice> mDeviceV1_1;
516         sp<hal::V1_2::IDevice> mDeviceV1_2;
517         sp<hal::V1_3::IDevice> mDeviceV1_3;
518 
519         /**
520          * HIDL callback to be invoked if the service for mDeviceV1_0 crashes.
521          *
522          * nullptr if this Core instance is a move victim and hence has no
523          * callback to be unlinked.
524          */
525         sp<IDeviceDeathHandler> mDeathHandler;
526     };
527 
528     // This method retrieves the appropriate mCore.mDevice* field, under a read lock.
529     template <typename T_IDevice>
getDevice()530     sp<T_IDevice> getDevice() const EXCLUDES(mMutex) {
531         std::shared_lock lock(mMutex);
532         return mCore.getDevice<T_IDevice>();
533     }
534 
535     // This method retrieves the appropriate mCore.mDevice* fields, under a read lock.
536     template <typename T_IDevice>
getDeviceAndDeathHandler()537     auto getDeviceAndDeathHandler() const EXCLUDES(mMutex) {
538         std::shared_lock lock(mMutex);
539         return mCore.getDeviceAndDeathHandler<T_IDevice>();
540     }
541 
542     // This method calls the function fn in a manner that supports recovering
543     // from a driver crash: If the driver implementation is dead because the
544     // driver crashed either before the call to fn or during the call to fn, we
545     // will attempt to obtain a new instance of the same driver and call fn
546     // again.
547     //
548     // If a callback is provided, this method protects it against driver death
549     // and waits for it (callback->wait()).
550     template <typename T_Return, typename T_IDevice, typename T_Callback = std::nullptr_t>
551     hal::Return<T_Return> recoverable(
552             const char* context,
553             const std::function<hal::Return<T_Return>(const sp<T_IDevice>&)>& fn,
554             const T_Callback& callback = nullptr) const EXCLUDES(mMutex);
555 
556     // The name of the service that implements the driver.
557     const std::string kServiceName;
558 
559     // Factory function object to generate an IDevice object.
560     const hal::DeviceFactory kMakeDevice;
561 
562     // Guards access to mCore.
563     mutable std::shared_mutex mMutex;
564 
565     // Data that can be rewritten during driver recovery.  Guarded againt
566     // synchronous access by a mutex: Any number of concurrent read accesses is
567     // permitted, but a write access excludes all other accesses.
568     mutable Core mCore GUARDED_BY(mMutex);
569 };
570 
571 /** This class wraps an IPreparedModel object of any version. */
572 class VersionedIPreparedModel {
573     DISALLOW_IMPLICIT_CONSTRUCTORS(VersionedIPreparedModel);
574 
575    public:
576     /**
577      * Constructor for the VersionedIPreparedModel object.
578      *
579      * This constructor should not be used directly. Instead,
580      * VersionedIPreparedModel should be created via
581      * VersionedIDevice::prepareModel*.
582      *
583      * VersionedIPreparedModel is constructed with the V1_0::IPreparedModel object, which
584      * represents a device that is at least v1.0 of the interface. The constructor downcasts
585      * to the latest version of the IPreparedModel interface, and will default to using the
586      * latest version of all IPreparedModel interface methods automatically.
587      *
588      * @param preparedModel A prepared model object that is least version 1.0 of the
589      *                      IPreparedModel interface.
590      * @param deathHandler A hidl_death_recipient that will proactively handle
591      *                     the case when the service containing the IDevice
592      *                     object crashes.
593      */
594     VersionedIPreparedModel(sp<hal::V1_0::IPreparedModel> preparedModel,
595                             sp<IPreparedModelDeathHandler> deathHandler);
596 
597     /**
598      * Destructor for the VersionedIPreparedModel object.
599      *
600      * This destructor unlinksToDeath this object's hidl_death_recipient as it
601      * no longer needs to handle the case where the IPreparedModel's service
602      * crashes.
603      */
604     ~VersionedIPreparedModel();
605 
606     /**
607      * Performs a synchronous execution on a prepared model.
608      *
609      * The execution is performed synchronously with respect to the caller.
610      * VersionedIPreparedModel::execute must verify the inputs to the function
611      * are correct. If there is an error, VersionedIPreparedModel::execute must
612      * immediately return with the appropriate result code. If the inputs to the
613      * function are valid and there is no error,
614      * VersionedIPreparedModel::execute must perform the execution, and must not
615      * return until the execution is complete.
616      *
617      * If the prepared model was prepared from a model wherein all tensor
618      * operands have fully specified dimensions, and the inputs to the function
619      * are valid, and at execution time every operation's input operands have
620      * legal values, then the execution should complete successfully
621      * (ANEURALNETWORKS_NO_ERROR): There must be no failure unless the device
622      * itself is in a bad state.
623      *
624      * execute may be called with an optional deadline. If the execution is not
625      * able to be completed before the provided deadline, the execution may be
626      * aborted, and either {@link ErrorStatus::MISSED_DEADLINE_TRANSIENT} or
627      * {@link ErrorStatus::MISSED_DEADLINE_PERSISTENT} must be returned. The
628      * error due to an abort must be sent the same way as other errors,
629      * described above.
630      *
631      * Any number of calls to the VersionedIPreparedModel::execute function, in
632      * any combination, may be made concurrently, even on the same
633      * VersionedIPreparedModel object.
634      *
635      * @param request The input and output information on which the prepared
636      *     model is to be executed.
637      * @param measure Specifies whether or not to measure duration of the
638      *     execution.
639      * @param deadline Optional time point. If provided, prepareModel is
640      *     expected to complete by this time point. If it is not able to be
641      *     completed by the deadline, the execution may be aborted.
642      * @param loopTimeoutDuration The maximum amount of time that should be spent
643      *     executing a {@link OperationType::WHILE} operation. If a loop
644      *     condition model does not output false within this duration, the
645      *     execution must be aborted. If no loop timeout duration is provided,
646      *     the maximum amount of time is {@link LoopTimeoutDurationNs::DEFAULT}.
647      *     When provided, the duration must not exceed {@link
648      *     LoopTimeoutDurationNs::MAXIMUM}.
649      * @param preferSynchronous 'true' to perform synchronous HAL execution when
650      *     possible, 'false' to force asynchronous HAL execution.
651      * @return A tuple consisting of:
652      *     - Result code of the execution, must be:
653      *         - ANEURALNETWORKS_NO_ERROR if execution is performed successfully
654      *         - ANEURALNETWORKS_UNAVAILABLE_DEVICE if driver is offline or busy
655      *         - ANEURALNETWORKS_OP_FAILED if there is an unspecified error
656      *         - ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE if at least one output
657      *             operand buffer is not large enough to store the corresponding
658      *             output
659      *         - ANEURALNETWORKS_BAD_DATA if one of the input arguments is
660      *             invalid
661      *     - A list of shape information of model output operands.
662      *         The index into "outputShapes" corresponds to the index of the
663      *         output operand in the Request outputs vector. outputShapes must
664      *         be empty unless the result code is either
665      *         ANEURALNETWORKS_NO_ERROR or
666      *         ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE. outputShapes may be
667      *         empty if the result code is ANEURALNETWORKS_NO_ERROR and all
668      *         model output operands are fully-specified at execution time.
669      *         outputShapes must have the same number of elements as the number
670      *         of model output operands if the result code is
671      *         ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE, or if the result code
672      *         is ANEURALNETWORKS_NO_ERROR and the model has at least one output
673      *         operand that is not fully-specified.
674      *     - Duration of execution. Unless measure is YES and result code is
675      *         ANEURALNETWORKS_NO_ERROR, all times must be reported as
676      *         UINT64_MAX. A driver may choose to report any time as UINT64_MAX,
677      *         indicating that measurement is not available.
678      */
679     std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> execute(
680             const hal::Request& request, hal::MeasureTiming measure,
681             const std::optional<Deadline>& deadline,
682             const hal::OptionalTimeoutDuration& loopTimeoutDuration, bool preferSynchronous) const;
683 
684     /**
685      * Creates a burst controller on a prepared model.
686      *
687      * @param preferPowerOverLatency 'true' if the Burst object should run in a
688      *                               more power efficient mode, 'false' if more
689      *                               power can be used to possibly reduce
690      *                               burst compute latency.
691      * @return ExecutionBurstController Execution burst controller object.
692      *                                  nullptr is returned if the burst cannot
693      *                                  be configured for any reason.
694      */
695     std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
696             bool preferPowerOverLatency) const;
697 
698     /**
699      * Launch a fenced asynchronous execution on a prepared model.
700      *
701      * The execution is performed asynchronously with respect to the caller.
702      * executeFenced must fully validate the request. If there is an error during validation,
703      * executeFenced must immediately return with the corresponding ErrorStatus. If the inputs
704      * to the function are valid and there is no error and there is no error launching,
705      * executeFenced must dispatch an asynchronous task to perform the execution in the
706      * background, and immediately return with ErrorStatus::NONE, a sync fence that will be
707      * signaled once the execution is completed, and a callback that can be used by the client
708      * to query the duration and runtime error status. If the task has finished
709      * before the call returns, empty handle may be returned for the syncFence. If the
710      * asynchronous task fails to launch, executeFenced must immediately return with
711      * ErrorStatus::GENERAL_FAILURE, an empty handle for the syncFence, and nullptr
712      * for callback. The execution must wait for all the sync fences (if any) in waitFor to be
713      * signaled before starting the actual execution.
714      *
715      * If any of sync fences in waitFor changes to error status after the executeFenced
716      * call succeeds, the driver must immediately set the returned syncFence to error status.
717      *
718      * When the asynchronous task has finished its execution, it must
719      * immediately signal the syncFence returned from executeFenced call. After
720      * the syncFence is signaled, the task must not modify the content of
721      * any data object referenced by 'request' (described by the
722      * {@link @1.0::DataLocation} of a {@link @1.0::RequestArgument}).
723      *
724      * executeFenced may be called with an optional deadline and an optional
725      * timeoutDurationAfterFence. If the execution is not able to be completed
726      * before the provided deadline or within the timeoutDurationAfterFence,
727      * whichever comes earlier, the execution may be aborted, and either {@link
728      * ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link
729      * ErrorStatus::MISSED_DEADLINE_PERSISTENT} may be returned. The error due
730      * to an abort must be sent the same way as other errors, described above.
731      *
732      * Any number of calls to the executeFenced, execute* and executeSynchronously*
733      * functions, in any combination, may be made concurrently, even on the same
734      * IPreparedModel object.
735      *
736      * @param request The input and output information on which the prepared
737      *                model is to be executed.
738      * @param waitFor A vector of sync fence file descriptors. The execution must
739      *                wait for all sync fence to be signaled before starting the
740      *                task.
741      * @param measure Specifies whether or not to measure duration of the execution.
742      * @param deadline The time by which execution is expected to complete. If
743      *                 the execution cannot be finished by the deadline, the
744      *                 execution may be aborted.
745      * @param loopTimeoutDuration The maximum amount of time that should be spent
746      *     executing a {@link OperationType::WHILE} operation. If a loop
747      *     condition model does not output false within this duration, the
748      *     execution must be aborted. If no loop timeout duration is provided,
749      *     the maximum amount of time is {@link LoopTimeoutDurationNs::DEFAULT}.
750      *     When provided, the duration must not exceed {@link
751      *     LoopTimeoutDurationNs::MAXIMUM}.
752      * @param timeoutDurationAfterFence The timeout duration within which the
753      *                                  execution is expected to complete after
754      *                                  all sync fences in waitFor are signaled.
755      * @return A tuple consisting of:
756      *         - Error code of the dispatch call.
757      *         - A sync_fence that will be triggered when the task is completed.
758      *           The sync_fence will be set to error if critical error occurs when doing
759      *           actual evaluation.
760      *         - A callback can be used to query information like duration
761      *           and detailed runtime error status when the task is completed.
762      *         - Optional timing information. Only useful if the call is simulated using
763      *           sync execution. Either IFencedExecutionCallback will be
764      *           returned or optional timing information is returned
765      */
766     std::tuple<int, hal::hidl_handle, sp<hal::IFencedExecutionCallback>, hal::Timing> executeFenced(
767             const hal::Request& request, const hal::hidl_vec<hal::hidl_handle>& waitFor,
768             hal::MeasureTiming measure, const std::optional<Deadline>& deadline,
769             const hal::OptionalTimeoutDuration& loopTimeoutDuration,
770             const hal::OptionalTimeoutDuration& timeoutDurationAfterFence);
771 
772    private:
773     friend class VersionedIDevice;
774 
775     std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> executeAsynchronously(
776             const hal::Request& request, hal::MeasureTiming timing,
777             const std::optional<Deadline>& deadline,
778             const hal::OptionalTimeoutDuration& loopTimeoutDuration) const;
779     std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> executeSynchronously(
780             const hal::Request& request, hal::MeasureTiming measure,
781             const std::optional<Deadline>& deadline,
782             const hal::OptionalTimeoutDuration& loopTimeoutDuration) const;
783 
784     /**
785      * Returns sp<V1_3::IPreparedModel> that is a downcast of the sp<V1_0::IPreparedModel>
786      * passed to the constructor.  This will be nullptr if that IPreparedModel is
787      * not actually of the specified downcast type.
788      */
getV1_3()789     sp<hal::V1_3::IPreparedModel> getV1_3() const { return mPreparedModelV1_3; }
790 
791     /**
792      * All versions of IPreparedModel are necessary because the preparedModel could be v1.0,
793      * v1.2, or a later version. All these pointers logically represent the same object.
794      *
795      * The general strategy is: HIDL returns a V1_0 prepared model object, which
796      * (if not nullptr) could be v1.0, v1.2, or a greater version. The V1_0
797      * object is then "dynamically cast" to objects of later versions. If successful,
798      * mPreparedModel* will point to the same object as mPreparedModelV1_0; otherwise,
799      * mPreparedModel* will be nullptr.
800      *
801      * In general:
802      * * If the prepared model is truly v1.0, mPreparedModelV1_0 will point to a valid object,
803      *   both mPreparedModelV1_2 and mPreparedModelV1_3 will be nullptr.
804      * * If the prepared model is truly v1.2, both mPreparedModelV1_0 and mPreparedModelV1_2
805      *   will point to the same valid object, but mPreparedModelV1_3 will be nullptr.
806      * * If the prepared model is truly v1.3 or later, all of mPreparedModelV1_0,
807      *   mPreparedModelV1_2, and mPreparedModelV1_3 will point to the same valid object.
808      *
809      * Idiomatic usage: if mPreparedModelV1_3 is non-null, do V1_3 dispatch;
810      *       otherwise, if mPreparedModelV1_2 is non-null, do V1_2 dispatch;
811      *       otherwise, do V1_0 dispatch.
812      */
813     sp<hal::V1_0::IPreparedModel> mPreparedModelV1_0;
814     sp<hal::V1_2::IPreparedModel> mPreparedModelV1_2;
815     sp<hal::V1_3::IPreparedModel> mPreparedModelV1_3;
816 
817     /**
818      * HIDL callback to be invoked if the service for mPreparedModelV1_0 crashes.
819      */
820     const sp<IPreparedModelDeathHandler> mDeathHandler;
821 };
822 
823 }  // namespace nn
824 }  // namespace android
825 
826 #endif  // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_VERSIONED_INTERFACES_H
827