1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H 18 #define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H 19 20 #include <android-base/macros.h> 21 #include <android/hardware/neuralnetworks/1.0/types.h> 22 #include <android/hardware/neuralnetworks/1.1/types.h> 23 #include <android/hardware/neuralnetworks/1.2/IBurstCallback.h> 24 #include <android/hardware/neuralnetworks/1.2/IBurstContext.h> 25 #include <android/hardware/neuralnetworks/1.2/IPreparedModel.h> 26 #include <android/hardware/neuralnetworks/1.2/types.h> 27 #include <fmq/MessageQueue.h> 28 #include <hidl/MQDescriptor.h> 29 30 #include <atomic> 31 #include <chrono> 32 #include <map> 33 #include <memory> 34 #include <mutex> 35 #include <stack> 36 #include <tuple> 37 #include <utility> 38 #include <vector> 39 40 namespace android::nn { 41 42 /** 43 * Number of elements in the FMQ. 44 */ 45 constexpr const size_t kExecutionBurstChannelLength = 1024; 46 47 /** 48 * Function to serialize a request. 49 * 50 * Prefer calling RequestChannelSender::send. 51 * 52 * @param request Request object without the pool information. 53 * @param measure Whether to collect timing information for the execution. 54 * @param memoryIds Slot identifiers corresponding to memory resources for the 55 * request. 56 * @return Serialized FMQ request data. 57 */ 58 std::vector<hardware::neuralnetworks::V1_2::FmqRequestDatum> serialize( 59 const hardware::neuralnetworks::V1_0::Request& request, 60 hardware::neuralnetworks::V1_2::MeasureTiming measure, const std::vector<int32_t>& slots); 61 62 /** 63 * Deserialize the FMQ result data. 64 * 65 * The three resulting fields are the status of the execution, the dynamic 66 * shapes of the output tensors, and the timing information of the execution. 67 * 68 * @param data Serialized FMQ result data. 69 * @return Result object if successfully deserialized, std::nullopt otherwise. 70 */ 71 std::optional<std::tuple<hardware::neuralnetworks::V1_0::ErrorStatus, 72 std::vector<hardware::neuralnetworks::V1_2::OutputShape>, 73 hardware::neuralnetworks::V1_2::Timing>> 74 deserialize(const std::vector<hardware::neuralnetworks::V1_2::FmqResultDatum>& data); 75 76 /** 77 * Convert result code to error status. 78 * 79 * @param resultCode Result code to be converted. 80 * @return ErrorStatus Resultant error status. 81 */ 82 hardware::neuralnetworks::V1_0::ErrorStatus legacyConvertResultCodeToErrorStatus(int resultCode); 83 84 /** 85 * ResultChannelReceiver is responsible for waiting on the channel until the 86 * packet is available, extracting the packet from the channel, and 87 * deserializing the packet. 88 * 89 * Because the receiver can wait on a packet that may never come (e.g., because 90 * the sending side of the packet has been closed), this object can be 91 * invalidated, unblocking the receiver. 92 */ 93 class ResultChannelReceiver { 94 using FmqResultDescriptor = 95 hardware::MQDescriptorSync<hardware::neuralnetworks::V1_2::FmqResultDatum>; 96 using FmqResultChannel = hardware::MessageQueue<hardware::neuralnetworks::V1_2::FmqResultDatum, 97 hardware::kSynchronizedReadWrite>; 98 99 public: 100 /** 101 * Create the receiving end of a result channel. 102 * 103 * Prefer this call over the constructor. 104 * 105 * @param channelLength Number of elements in the FMQ. 106 * @param pollingTimeWindow How much time (in microseconds) the 107 * ResultChannelReceiver is allowed to poll the FMQ before waiting on 108 * the blocking futex. Polling may result in lower latencies at the 109 * potential cost of more power usage. 110 * @return A pair of ResultChannelReceiver and the FMQ descriptor on 111 * successful creation, both nullptr otherwise. 112 */ 113 static std::pair<std::unique_ptr<ResultChannelReceiver>, const FmqResultDescriptor*> create( 114 size_t channelLength, std::chrono::microseconds pollingTimeWindow); 115 116 /** 117 * Get the result from the channel. 118 * 119 * This method will block until either: 120 * 1) The packet has been retrieved, or 121 * 2) The receiver has been invalidated 122 * 123 * @return Result object if successfully received, std::nullopt if error or 124 * if the receiver object was invalidated. 125 */ 126 std::optional<std::tuple<hardware::neuralnetworks::V1_0::ErrorStatus, 127 std::vector<hardware::neuralnetworks::V1_2::OutputShape>, 128 hardware::neuralnetworks::V1_2::Timing>> 129 getBlocking(); 130 131 /** 132 * Method to mark the channel as invalid, unblocking any current or future 133 * calls to ResultChannelReceiver::getBlocking. 134 */ 135 void invalidate(); 136 137 // prefer calling ResultChannelReceiver::getBlocking 138 std::optional<std::vector<hardware::neuralnetworks::V1_2::FmqResultDatum>> getPacketBlocking(); 139 140 ResultChannelReceiver(std::unique_ptr<FmqResultChannel> fmqResultChannel, 141 std::chrono::microseconds pollingTimeWindow); 142 143 private: 144 const std::unique_ptr<FmqResultChannel> mFmqResultChannel; 145 std::atomic<bool> mValid{true}; 146 const std::chrono::microseconds kPollingTimeWindow; 147 }; 148 149 /** 150 * RequestChannelSender is responsible for serializing the result packet of 151 * information, sending it on the result channel, and signaling that the data is 152 * available. 153 */ 154 class RequestChannelSender { 155 using FmqRequestDescriptor = 156 hardware::MQDescriptorSync<hardware::neuralnetworks::V1_2::FmqRequestDatum>; 157 using FmqRequestChannel = 158 hardware::MessageQueue<hardware::neuralnetworks::V1_2::FmqRequestDatum, 159 hardware::kSynchronizedReadWrite>; 160 161 public: 162 /** 163 * Create the sending end of a request channel. 164 * 165 * Prefer this call over the constructor. 166 * 167 * @param channelLength Number of elements in the FMQ. 168 * @return A pair of ResultChannelReceiver and the FMQ descriptor on 169 * successful creation, both nullptr otherwise. 170 */ 171 static std::pair<std::unique_ptr<RequestChannelSender>, const FmqRequestDescriptor*> create( 172 size_t channelLength); 173 174 /** 175 * Send the request to the channel. 176 * 177 * @param request Request object without the pool information. 178 * @param measure Whether to collect timing information for the execution. 179 * @param memoryIds Slot identifiers corresponding to memory resources for 180 * the request. 181 * @return 'true' on successful send, 'false' otherwise. 182 */ 183 bool send(const hardware::neuralnetworks::V1_0::Request& request, 184 hardware::neuralnetworks::V1_2::MeasureTiming measure, 185 const std::vector<int32_t>& slots); 186 187 /** 188 * Method to mark the channel as invalid, causing all future calls to 189 * RequestChannelSender::send to immediately return false without attempting 190 * to send a message across the FMQ. 191 */ 192 void invalidate(); 193 194 // prefer calling RequestChannelSender::send 195 bool sendPacket(const std::vector<hardware::neuralnetworks::V1_2::FmqRequestDatum>& packet); 196 197 RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel); 198 199 private: 200 const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel; 201 std::atomic<bool> mValid{true}; 202 }; 203 204 /** 205 * The ExecutionBurstController class manages both the serialization and 206 * deserialization of data across FMQ, making it appear to the runtime as a 207 * regular synchronous inference. Additionally, this class manages the burst's 208 * memory cache. 209 */ 210 class ExecutionBurstController { 211 DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstController); 212 213 public: 214 /** 215 * NN runtime burst callback object and memory cache. 216 * 217 * ExecutionBurstCallback associates a hidl_memory object with a slot number 218 * to be passed across FMQ. The ExecutionBurstServer can use this callback 219 * to retrieve this hidl_memory corresponding to the slot via HIDL. 220 * 221 * Whenever a hidl_memory object is copied, it will duplicate the underlying 222 * file descriptor. Because the NN runtime currently copies the hidl_memory 223 * on each execution, it is difficult to associate hidl_memory objects with 224 * previously cached hidl_memory objects. For this reason, callers of this 225 * class must pair each hidl_memory object with an associated key. For 226 * efficiency, if two hidl_memory objects represent the same underlying 227 * buffer, they must use the same key. 228 */ 229 class ExecutionBurstCallback : public hardware::neuralnetworks::V1_2::IBurstCallback { 230 DISALLOW_COPY_AND_ASSIGN(ExecutionBurstCallback); 231 232 public: 233 ExecutionBurstCallback() = default; 234 235 hardware::Return<void> getMemories(const hardware::hidl_vec<int32_t>& slots, 236 getMemories_cb cb) override; 237 238 /** 239 * This function performs one of two different actions: 240 * 1) If a key corresponding to a memory resource is unrecognized by the 241 * ExecutionBurstCallback object, the ExecutionBurstCallback object 242 * will allocate a slot, bind the memory to the slot, and return the 243 * slot identifier. 244 * 2) If a key corresponding to a memory resource is recognized by the 245 * ExecutionBurstCallback object, the ExecutionBurstCallback object 246 * will return the existing slot identifier. 247 * 248 * @param memories Memory resources used in an inference. 249 * @param keys Unique identifiers where each element corresponds to a 250 * memory resource element in "memories". 251 * @return Unique slot identifiers where each returned slot element 252 * corresponds to a memory resource element in "memories". 253 */ 254 std::vector<int32_t> getSlots(const hardware::hidl_vec<hardware::hidl_memory>& memories, 255 const std::vector<intptr_t>& keys); 256 257 /* 258 * This function performs two different actions: 259 * 1) Removes an entry from the cache (if present), including the local 260 * storage of the hidl_memory object. Note that this call does not 261 * free any corresponding hidl_memory object in ExecutionBurstServer, 262 * which is separately freed via IBurstContext::freeMemory. 263 * 2) Return whether a cache entry was removed and which slot was removed if 264 * found. If the key did not to correspond to any entry in the cache, a 265 * slot number of 0 is returned. The slot number and whether the entry 266 * existed is useful so the same slot can be freed in the 267 * ExecutionBurstServer's cache via IBurstContext::freeMemory. 268 */ 269 std::pair<bool, int32_t> freeMemory(intptr_t key); 270 271 private: 272 int32_t getSlotLocked(const hardware::hidl_memory& memory, intptr_t key); 273 int32_t allocateSlotLocked(); 274 275 std::mutex mMutex; 276 std::stack<int32_t, std::vector<int32_t>> mFreeSlots; 277 std::map<intptr_t, int32_t> mMemoryIdToSlot; 278 std::vector<hardware::hidl_memory> mMemoryCache; 279 }; 280 281 /** 282 * Creates a burst controller on a prepared model. 283 * 284 * Prefer this over ExecutionBurstController's constructor. 285 * 286 * @param preparedModel Model prepared for execution to execute on. 287 * @param pollingTimeWindow How much time (in microseconds) the 288 * ExecutionBurstController is allowed to poll the FMQ before waiting on 289 * the blocking futex. Polling may result in lower latencies at the 290 * potential cost of more power usage. 291 * @return ExecutionBurstController Execution burst controller object. 292 */ 293 static std::unique_ptr<ExecutionBurstController> create( 294 const sp<hardware::neuralnetworks::V1_2::IPreparedModel>& preparedModel, 295 std::chrono::microseconds pollingTimeWindow); 296 297 // prefer calling ExecutionBurstController::create 298 ExecutionBurstController(const std::shared_ptr<RequestChannelSender>& requestChannelSender, 299 const std::shared_ptr<ResultChannelReceiver>& resultChannelReceiver, 300 const sp<hardware::neuralnetworks::V1_2::IBurstContext>& burstContext, 301 const sp<ExecutionBurstCallback>& callback, 302 const sp<hardware::hidl_death_recipient>& deathHandler = nullptr); 303 304 // explicit destructor to unregister the death recipient 305 ~ExecutionBurstController(); 306 307 /** 308 * Execute a request on a model. 309 * 310 * @param request Arguments to be executed on a model. 311 * @param measure Whether to collect timing measurements, either YES or NO 312 * @param memoryIds Identifiers corresponding to each memory object in the 313 * request's pools. 314 * @return A tuple of: 315 * - result code of the execution 316 * - dynamic output shapes from the execution 317 * - any execution time measurements of the execution 318 * - whether or not a failed burst execution should be re-run using a 319 * different path (e.g., IPreparedModel::executeSynchronously) 320 */ 321 std::tuple<int, std::vector<hardware::neuralnetworks::V1_2::OutputShape>, 322 hardware::neuralnetworks::V1_2::Timing, bool> 323 compute(const hardware::neuralnetworks::V1_0::Request& request, 324 hardware::neuralnetworks::V1_2::MeasureTiming measure, 325 const std::vector<intptr_t>& memoryIds); 326 327 /** 328 * Propagate a user's freeing of memory to the service. 329 * 330 * @param key Key corresponding to the memory object. 331 */ 332 void freeMemory(intptr_t key); 333 334 private: 335 std::mutex mMutex; 336 const std::shared_ptr<RequestChannelSender> mRequestChannelSender; 337 const std::shared_ptr<ResultChannelReceiver> mResultChannelReceiver; 338 const sp<hardware::neuralnetworks::V1_2::IBurstContext> mBurstContext; 339 const sp<ExecutionBurstCallback> mMemoryCache; 340 const sp<hardware::hidl_death_recipient> mDeathHandler; 341 }; 342 343 } // namespace android::nn 344 345 #endif // ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H 346