1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H
18 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H
19 
20 #include <android-base/macros.h>
21 
22 #include <map>
23 #include <memory>
24 #include <string>
25 #include <tuple>
26 #include <unordered_set>
27 #include <utility>
28 #include <vector>
29 
30 #include "Callbacks.h"
31 #include "HalInterfaces.h"
32 #include "Memory.h"
33 #include "Utils.h"
34 
35 namespace android {
36 namespace nn {
37 
38 // Forward declaration
39 class Device;
40 class ExecutionBurstController;
41 class MetaModel;
42 class ModelArgumentInfo;
43 class VersionedIPreparedModel;
44 
45 // A unified interface for actual driver prepared model as well as the CPU.
46 class PreparedModel {
47     DISALLOW_COPY_AND_ASSIGN(PreparedModel);
48 
49    public:
50     PreparedModel() = default;
51     virtual ~PreparedModel() = default;
52 
53     virtual const Device* getDevice() const = 0;
54     virtual std::shared_ptr<VersionedIPreparedModel> getInterface() const = 0;
55 
56     // Perform computation with given input/output argument info and memory pools.
57     virtual std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> execute(
58             const std::vector<ModelArgumentInfo>& inputs,
59             const std::vector<ModelArgumentInfo>& outputs,
60             const std::vector<const Memory*>& memories,
61             const std::shared_ptr<ExecutionBurstController>& burstController,
62             hal::MeasureTiming measure, const std::optional<Deadline>& deadline,
63             const hal::OptionalTimeoutDuration& loopTimeoutDuration) const = 0;
64 
65     // Perform fenced computation with given input/output argument info and memory pools.
66     // The returned timing information is only valid if the callback is nullptr.
67     // Returns error_code, sync_fence, callback and timing.
68     virtual std::tuple<int, int, sp<hal::IFencedExecutionCallback>, hal::Timing> executeFenced(
69             const std::vector<ModelArgumentInfo>& inputs,
70             const std::vector<ModelArgumentInfo>& outputs,
71             const std::vector<const Memory*>& memories, const std::vector<int>& waitFor,
72             hal::MeasureTiming measure, const std::optional<Deadline>& deadline,
73             const hal::OptionalTimeoutDuration& loopTimeoutDuration,
74             const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) const = 0;
75 
76     virtual std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
77             bool preferPowerOverLatency) const = 0;
78 };
79 
80 // A unified interface for actual driver devices as well as the CPU
81 class Device {
82     DISALLOW_COPY_AND_ASSIGN(Device);
83 
84    public:
85     Device() = default;
86     virtual ~Device() = default;
87 
88     // Introspection methods returning device information
89     virtual const std::string& getName() const = 0;
90     virtual const std::string& getVersionString() const = 0;
91     virtual int64_t getFeatureLevel() const = 0;
92     virtual int32_t getType() const = 0;
93     virtual const std::vector<hal::Extension>& getSupportedExtensions() const = 0;
94 
95     // See the MetaModel class in MetaModel.h for more details.
96     virtual std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const = 0;
97 
98     virtual hal::PerformanceInfo getPerformance(hal::OperandType type) const = 0;
99     virtual hal::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const = 0;
100     virtual hal::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const = 0;
101     virtual hal::PerformanceInfo getIfPerformance() const = 0;
102     virtual hal::PerformanceInfo getWhilePerformance() const = 0;
103     virtual bool isCachingSupported() const = 0;
104     virtual int wait() const = 0;
105 
106     virtual std::pair<int, std::shared_ptr<PreparedModel>> prepareModel(
107             const hal::ModelFactory& makeModel, hal::ExecutionPreference preference,
108             hal::Priority priority, const std::optional<Deadline>& deadline,
109             const std::string& cacheDir,
110             const std::optional<hal::CacheToken>& maybeToken) const = 0;
111 
112     // The caller is responsible for making sure the MemoryDescriptor only contains PreparedModels
113     // from the same Device.
114     virtual std::pair<int, std::unique_ptr<Memory>> allocate(const MemoryDescriptor& desc,
115                                                              hal::OperandType type) const = 0;
116 };
117 
118 // Manages the NN HAL devices.  Only one instance of this class will exist.
119 // Use get() to retrieve it.
120 class DeviceManager {
121    public:
getDrivers()122     const std::vector<std::shared_ptr<Device>>& getDrivers() const {
123         if (mSetCpuOnly || mDebugNNCpuOnly) {
124             return mDevicesCpuOnly;
125         }
126         return mDevices;
127     }
128 
129     // For testing only:
setUseCpuOnly(bool useCpuOnly)130     void setUseCpuOnly(bool useCpuOnly) { mSetCpuOnly = useCpuOnly; }
getUseCpuOnly()131     bool getUseCpuOnly() const { return mSetCpuOnly; }
setSyncExecHal(bool val)132     void setSyncExecHal(bool val) {
133         mSyncExecHal = val;
134         mSyncExecHalSetter = true;
135     }
136 
syncExecCpu()137     bool syncExecCpu() const { return mSyncExecCpu; }
syncExecHal()138     bool syncExecHal() const { return mSyncExecHal; }
syncExecRuntime()139     bool syncExecRuntime() const { return mSyncExecRuntime; }
140 
141     // How to handle graph partitioning?
142     // 0 - Don't do graph partitioning.
143     // 1 - Do graph partitioning; but fall back to non-partitioned
144     //     execution if there is a partitioning failure.
145     // 2 - Do graph partitioning, and rely on it; there is no fallback.
146     enum { kPartitioningNo = 0, kPartitioningWithFallback = 1, kPartitioningWithoutFallback = 2 };
getPartitioning()147     uint32_t getPartitioning() const { return mPartitioning; }
partitioningAllowsFallback(uint32_t partitioning)148     static bool partitioningAllowsFallback(uint32_t partitioning) {
149         return partitioning == kPartitioningWithFallback;
150     }
151 
strictSlicing()152     bool strictSlicing() const { return mStrictSlicing; }
153 
154     // Returns the singleton manager.
155     static DeviceManager* get();
156 
157     // Returns the singleton Cpu device.
158     static std::shared_ptr<Device> getCpuDevice();
159 
160     // The forTest_* functions below are solely intended for use by unit tests.
161 
162     // Returns all devices (ignores the cpu-only flags).
forTest_getDevices()163     std::vector<std::shared_ptr<Device>> forTest_getDevices() const { return mDevices; }
164 
165     // Sets the device list (does not affect cpu-only queries).
forTest_setDevices(std::vector<std::shared_ptr<Device>> devices)166     void forTest_setDevices(std::vector<std::shared_ptr<Device>> devices) {
167         mDevices = std::move(devices);
168     }
169 
170     // Register a test device.
forTest_registerDevice(const std::string & name,const sp<hal::V1_0::IDevice> & device)171     void forTest_registerDevice(const std::string& name, const sp<hal::V1_0::IDevice>& device) {
172         const hal::DeviceFactory makeDevice = [device](bool /*blocking*/) { return device; };
173         registerDevice(name, makeDevice);
174     }
175 
176     // Re-initialize the list of available devices.
forTest_reInitializeDeviceList()177     void forTest_reInitializeDeviceList() {
178         mDevices.clear();
179         mDevicesCpuOnly.clear();
180         findAvailableDevices();
181     }
182 
183     // Make a test device
184     static std::shared_ptr<Device> forTest_makeDriverDevice(const std::string& name,
185                                                             const sp<hal::V1_0::IDevice>& device);
186 
forTest_isCpuDevice(const ANeuralNetworksDevice * device)187     bool forTest_isCpuDevice(const ANeuralNetworksDevice* device) const {
188         return reinterpret_cast<const Device*>(device) == getCpuDevice().get();
189     }
190 
191    private:
192     // Builds the list of available drivers and queries their capabilities.
193     DeviceManager();
194 
195     // Adds a device for the manager to use.
196     void registerDevice(const std::string& name, const hal::DeviceFactory& makeDevice);
197 
198     void findAvailableDevices();
199 
200     // List of all the devices we discovered (including CpuDevice).
201     std::vector<std::shared_ptr<Device>> mDevices;
202 
203     // We set this one to have CpuDevice only. To be used when m*CpuOnly is true.
204     std::vector<std::shared_ptr<Device>> mDevicesCpuOnly;
205 
206     // If either of these is true, we'll ignore the drivers that are
207     // on the device and run everything on the CPU.
208     bool mSetCpuOnly = false;      // set by setUseCpuOnly()
209     bool mDebugNNCpuOnly = false;  // derived from system property debug.nn.cpuonly
210 
211     // synchronous execution
212     bool mSyncExecCpu = true;
213     bool mSyncExecHal = true;         // Call executeSynchronously*() when available on device.
214     bool mSyncExecHalSetter = false;  // Has mSyncExecHal been set by setSyncExecHal()?
215                                       // If so, don't allow the setting to be overridden
216                                       //     by system property debug.nn.syncexec-hal
217     bool mSyncExecRuntime = false;
218 
219     static const uint32_t kPartitioningDefault = kPartitioningWithFallback;
220     uint32_t mPartitioning = kPartitioningDefault;
221 
222     bool mStrictSlicing = false;
223 };
224 
225 }  // namespace nn
226 }  // namespace android
227 
228 #endif  // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H
229