1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H 18 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H 19 20 #include <android-base/macros.h> 21 22 #include <map> 23 #include <memory> 24 #include <string> 25 #include <tuple> 26 #include <unordered_set> 27 #include <utility> 28 #include <vector> 29 30 #include "Callbacks.h" 31 #include "HalInterfaces.h" 32 #include "Memory.h" 33 #include "Utils.h" 34 35 namespace android { 36 namespace nn { 37 38 // Forward declaration 39 class Device; 40 class ExecutionBurstController; 41 class MetaModel; 42 class ModelArgumentInfo; 43 class VersionedIPreparedModel; 44 45 // A unified interface for actual driver prepared model as well as the CPU. 46 class PreparedModel { 47 DISALLOW_COPY_AND_ASSIGN(PreparedModel); 48 49 public: 50 PreparedModel() = default; 51 virtual ~PreparedModel() = default; 52 53 virtual const Device* getDevice() const = 0; 54 virtual std::shared_ptr<VersionedIPreparedModel> getInterface() const = 0; 55 56 // Perform computation with given input/output argument info and memory pools. 57 virtual std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> execute( 58 const std::vector<ModelArgumentInfo>& inputs, 59 const std::vector<ModelArgumentInfo>& outputs, 60 const std::vector<const Memory*>& memories, 61 const std::shared_ptr<ExecutionBurstController>& burstController, 62 hal::MeasureTiming measure, const std::optional<Deadline>& deadline, 63 const hal::OptionalTimeoutDuration& loopTimeoutDuration) const = 0; 64 65 // Perform fenced computation with given input/output argument info and memory pools. 66 // The returned timing information is only valid if the callback is nullptr. 67 // Returns error_code, sync_fence, callback and timing. 68 virtual std::tuple<int, int, sp<hal::IFencedExecutionCallback>, hal::Timing> executeFenced( 69 const std::vector<ModelArgumentInfo>& inputs, 70 const std::vector<ModelArgumentInfo>& outputs, 71 const std::vector<const Memory*>& memories, const std::vector<int>& waitFor, 72 hal::MeasureTiming measure, const std::optional<Deadline>& deadline, 73 const hal::OptionalTimeoutDuration& loopTimeoutDuration, 74 const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) const = 0; 75 76 virtual std::shared_ptr<ExecutionBurstController> configureExecutionBurst( 77 bool preferPowerOverLatency) const = 0; 78 }; 79 80 // A unified interface for actual driver devices as well as the CPU 81 class Device { 82 DISALLOW_COPY_AND_ASSIGN(Device); 83 84 public: 85 Device() = default; 86 virtual ~Device() = default; 87 88 // Introspection methods returning device information 89 virtual const std::string& getName() const = 0; 90 virtual const std::string& getVersionString() const = 0; 91 virtual int64_t getFeatureLevel() const = 0; 92 virtual int32_t getType() const = 0; 93 virtual const std::vector<hal::Extension>& getSupportedExtensions() const = 0; 94 95 // See the MetaModel class in MetaModel.h for more details. 96 virtual std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const = 0; 97 98 virtual hal::PerformanceInfo getPerformance(hal::OperandType type) const = 0; 99 virtual hal::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const = 0; 100 virtual hal::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const = 0; 101 virtual hal::PerformanceInfo getIfPerformance() const = 0; 102 virtual hal::PerformanceInfo getWhilePerformance() const = 0; 103 virtual bool isCachingSupported() const = 0; 104 virtual int wait() const = 0; 105 106 virtual std::pair<int, std::shared_ptr<PreparedModel>> prepareModel( 107 const hal::ModelFactory& makeModel, hal::ExecutionPreference preference, 108 hal::Priority priority, const std::optional<Deadline>& deadline, 109 const std::string& cacheDir, 110 const std::optional<hal::CacheToken>& maybeToken) const = 0; 111 112 // The caller is responsible for making sure the MemoryDescriptor only contains PreparedModels 113 // from the same Device. 114 virtual std::pair<int, std::unique_ptr<Memory>> allocate(const MemoryDescriptor& desc, 115 hal::OperandType type) const = 0; 116 }; 117 118 // Manages the NN HAL devices. Only one instance of this class will exist. 119 // Use get() to retrieve it. 120 class DeviceManager { 121 public: getDrivers()122 const std::vector<std::shared_ptr<Device>>& getDrivers() const { 123 if (mSetCpuOnly || mDebugNNCpuOnly) { 124 return mDevicesCpuOnly; 125 } 126 return mDevices; 127 } 128 129 // For testing only: setUseCpuOnly(bool useCpuOnly)130 void setUseCpuOnly(bool useCpuOnly) { mSetCpuOnly = useCpuOnly; } getUseCpuOnly()131 bool getUseCpuOnly() const { return mSetCpuOnly; } setSyncExecHal(bool val)132 void setSyncExecHal(bool val) { 133 mSyncExecHal = val; 134 mSyncExecHalSetter = true; 135 } 136 syncExecCpu()137 bool syncExecCpu() const { return mSyncExecCpu; } syncExecHal()138 bool syncExecHal() const { return mSyncExecHal; } syncExecRuntime()139 bool syncExecRuntime() const { return mSyncExecRuntime; } 140 141 // How to handle graph partitioning? 142 // 0 - Don't do graph partitioning. 143 // 1 - Do graph partitioning; but fall back to non-partitioned 144 // execution if there is a partitioning failure. 145 // 2 - Do graph partitioning, and rely on it; there is no fallback. 146 enum { kPartitioningNo = 0, kPartitioningWithFallback = 1, kPartitioningWithoutFallback = 2 }; getPartitioning()147 uint32_t getPartitioning() const { return mPartitioning; } partitioningAllowsFallback(uint32_t partitioning)148 static bool partitioningAllowsFallback(uint32_t partitioning) { 149 return partitioning == kPartitioningWithFallback; 150 } 151 strictSlicing()152 bool strictSlicing() const { return mStrictSlicing; } 153 154 // Returns the singleton manager. 155 static DeviceManager* get(); 156 157 // Returns the singleton Cpu device. 158 static std::shared_ptr<Device> getCpuDevice(); 159 160 // The forTest_* functions below are solely intended for use by unit tests. 161 162 // Returns all devices (ignores the cpu-only flags). forTest_getDevices()163 std::vector<std::shared_ptr<Device>> forTest_getDevices() const { return mDevices; } 164 165 // Sets the device list (does not affect cpu-only queries). forTest_setDevices(std::vector<std::shared_ptr<Device>> devices)166 void forTest_setDevices(std::vector<std::shared_ptr<Device>> devices) { 167 mDevices = std::move(devices); 168 } 169 170 // Register a test device. forTest_registerDevice(const std::string & name,const sp<hal::V1_0::IDevice> & device)171 void forTest_registerDevice(const std::string& name, const sp<hal::V1_0::IDevice>& device) { 172 const hal::DeviceFactory makeDevice = [device](bool /*blocking*/) { return device; }; 173 registerDevice(name, makeDevice); 174 } 175 176 // Re-initialize the list of available devices. forTest_reInitializeDeviceList()177 void forTest_reInitializeDeviceList() { 178 mDevices.clear(); 179 mDevicesCpuOnly.clear(); 180 findAvailableDevices(); 181 } 182 183 // Make a test device 184 static std::shared_ptr<Device> forTest_makeDriverDevice(const std::string& name, 185 const sp<hal::V1_0::IDevice>& device); 186 forTest_isCpuDevice(const ANeuralNetworksDevice * device)187 bool forTest_isCpuDevice(const ANeuralNetworksDevice* device) const { 188 return reinterpret_cast<const Device*>(device) == getCpuDevice().get(); 189 } 190 191 private: 192 // Builds the list of available drivers and queries their capabilities. 193 DeviceManager(); 194 195 // Adds a device for the manager to use. 196 void registerDevice(const std::string& name, const hal::DeviceFactory& makeDevice); 197 198 void findAvailableDevices(); 199 200 // List of all the devices we discovered (including CpuDevice). 201 std::vector<std::shared_ptr<Device>> mDevices; 202 203 // We set this one to have CpuDevice only. To be used when m*CpuOnly is true. 204 std::vector<std::shared_ptr<Device>> mDevicesCpuOnly; 205 206 // If either of these is true, we'll ignore the drivers that are 207 // on the device and run everything on the CPU. 208 bool mSetCpuOnly = false; // set by setUseCpuOnly() 209 bool mDebugNNCpuOnly = false; // derived from system property debug.nn.cpuonly 210 211 // synchronous execution 212 bool mSyncExecCpu = true; 213 bool mSyncExecHal = true; // Call executeSynchronously*() when available on device. 214 bool mSyncExecHalSetter = false; // Has mSyncExecHal been set by setSyncExecHal()? 215 // If so, don't allow the setting to be overridden 216 // by system property debug.nn.syncexec-hal 217 bool mSyncExecRuntime = false; 218 219 static const uint32_t kPartitioningDefault = kPartitioningWithFallback; 220 uint32_t mPartitioning = kPartitioningDefault; 221 222 bool mStrictSlicing = false; 223 }; 224 225 } // namespace nn 226 } // namespace android 227 228 #endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H 229