1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
18 #define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
19 
20 #include <android-base/macros.h>
21 #include <android/hardware/neuralnetworks/1.0/types.h>
22 #include <android/hardware/neuralnetworks/1.1/types.h>
23 #include <android/hardware/neuralnetworks/1.2/IBurstCallback.h>
24 #include <android/hardware/neuralnetworks/1.2/IBurstContext.h>
25 #include <android/hardware/neuralnetworks/1.2/IPreparedModel.h>
26 #include <android/hardware/neuralnetworks/1.2/types.h>
27 #include <fmq/MessageQueue.h>
28 #include <hidl/MQDescriptor.h>
29 
30 #include <atomic>
31 #include <chrono>
32 #include <map>
33 #include <memory>
34 #include <mutex>
35 #include <stack>
36 #include <tuple>
37 #include <utility>
38 #include <vector>
39 
40 namespace android::nn {
41 
42 /**
43  * Number of elements in the FMQ.
44  */
45 constexpr const size_t kExecutionBurstChannelLength = 1024;
46 
47 /**
48  * Function to serialize a request.
49  *
50  * Prefer calling RequestChannelSender::send.
51  *
52  * @param request Request object without the pool information.
53  * @param measure Whether to collect timing information for the execution.
54  * @param memoryIds Slot identifiers corresponding to memory resources for the
55  *     request.
56  * @return Serialized FMQ request data.
57  */
58 std::vector<hardware::neuralnetworks::V1_2::FmqRequestDatum> serialize(
59         const hardware::neuralnetworks::V1_0::Request& request,
60         hardware::neuralnetworks::V1_2::MeasureTiming measure, const std::vector<int32_t>& slots);
61 
62 /**
63  * Deserialize the FMQ result data.
64  *
65  * The three resulting fields are the status of the execution, the dynamic
66  * shapes of the output tensors, and the timing information of the execution.
67  *
68  * @param data Serialized FMQ result data.
69  * @return Result object if successfully deserialized, std::nullopt otherwise.
70  */
71 std::optional<std::tuple<hardware::neuralnetworks::V1_0::ErrorStatus,
72                          std::vector<hardware::neuralnetworks::V1_2::OutputShape>,
73                          hardware::neuralnetworks::V1_2::Timing>>
74 deserialize(const std::vector<hardware::neuralnetworks::V1_2::FmqResultDatum>& data);
75 
76 /**
77  * Convert result code to error status.
78  *
79  * @param resultCode Result code to be converted.
80  * @return ErrorStatus Resultant error status.
81  */
82 hardware::neuralnetworks::V1_0::ErrorStatus legacyConvertResultCodeToErrorStatus(int resultCode);
83 
84 /**
85  * ResultChannelReceiver is responsible for waiting on the channel until the
86  * packet is available, extracting the packet from the channel, and
87  * deserializing the packet.
88  *
89  * Because the receiver can wait on a packet that may never come (e.g., because
90  * the sending side of the packet has been closed), this object can be
91  * invalidated, unblocking the receiver.
92  */
93 class ResultChannelReceiver {
94     using FmqResultDescriptor =
95             hardware::MQDescriptorSync<hardware::neuralnetworks::V1_2::FmqResultDatum>;
96     using FmqResultChannel = hardware::MessageQueue<hardware::neuralnetworks::V1_2::FmqResultDatum,
97                                                     hardware::kSynchronizedReadWrite>;
98 
99    public:
100     /**
101      * Create the receiving end of a result channel.
102      *
103      * Prefer this call over the constructor.
104      *
105      * @param channelLength Number of elements in the FMQ.
106      * @param pollingTimeWindow How much time (in microseconds) the
107      *     ResultChannelReceiver is allowed to poll the FMQ before waiting on
108      *     the blocking futex. Polling may result in lower latencies at the
109      *     potential cost of more power usage.
110      * @return A pair of ResultChannelReceiver and the FMQ descriptor on
111      *     successful creation, both nullptr otherwise.
112      */
113     static std::pair<std::unique_ptr<ResultChannelReceiver>, const FmqResultDescriptor*> create(
114             size_t channelLength, std::chrono::microseconds pollingTimeWindow);
115 
116     /**
117      * Get the result from the channel.
118      *
119      * This method will block until either:
120      * 1) The packet has been retrieved, or
121      * 2) The receiver has been invalidated
122      *
123      * @return Result object if successfully received, std::nullopt if error or
124      *     if the receiver object was invalidated.
125      */
126     std::optional<std::tuple<hardware::neuralnetworks::V1_0::ErrorStatus,
127                              std::vector<hardware::neuralnetworks::V1_2::OutputShape>,
128                              hardware::neuralnetworks::V1_2::Timing>>
129     getBlocking();
130 
131     /**
132      * Method to mark the channel as invalid, unblocking any current or future
133      * calls to ResultChannelReceiver::getBlocking.
134      */
135     void invalidate();
136 
137     // prefer calling ResultChannelReceiver::getBlocking
138     std::optional<std::vector<hardware::neuralnetworks::V1_2::FmqResultDatum>> getPacketBlocking();
139 
140     ResultChannelReceiver(std::unique_ptr<FmqResultChannel> fmqResultChannel,
141                           std::chrono::microseconds pollingTimeWindow);
142 
143    private:
144     const std::unique_ptr<FmqResultChannel> mFmqResultChannel;
145     std::atomic<bool> mValid{true};
146     const std::chrono::microseconds kPollingTimeWindow;
147 };
148 
149 /**
150  * RequestChannelSender is responsible for serializing the result packet of
151  * information, sending it on the result channel, and signaling that the data is
152  * available.
153  */
154 class RequestChannelSender {
155     using FmqRequestDescriptor =
156             hardware::MQDescriptorSync<hardware::neuralnetworks::V1_2::FmqRequestDatum>;
157     using FmqRequestChannel =
158             hardware::MessageQueue<hardware::neuralnetworks::V1_2::FmqRequestDatum,
159                                    hardware::kSynchronizedReadWrite>;
160 
161    public:
162     /**
163      * Create the sending end of a request channel.
164      *
165      * Prefer this call over the constructor.
166      *
167      * @param channelLength Number of elements in the FMQ.
168      * @return A pair of ResultChannelReceiver and the FMQ descriptor on
169      *     successful creation, both nullptr otherwise.
170      */
171     static std::pair<std::unique_ptr<RequestChannelSender>, const FmqRequestDescriptor*> create(
172             size_t channelLength);
173 
174     /**
175      * Send the request to the channel.
176      *
177      * @param request Request object without the pool information.
178      * @param measure Whether to collect timing information for the execution.
179      * @param memoryIds Slot identifiers corresponding to memory resources for
180      *     the request.
181      * @return 'true' on successful send, 'false' otherwise.
182      */
183     bool send(const hardware::neuralnetworks::V1_0::Request& request,
184               hardware::neuralnetworks::V1_2::MeasureTiming measure,
185               const std::vector<int32_t>& slots);
186 
187     /**
188      * Method to mark the channel as invalid, causing all future calls to
189      * RequestChannelSender::send to immediately return false without attempting
190      * to send a message across the FMQ.
191      */
192     void invalidate();
193 
194     // prefer calling RequestChannelSender::send
195     bool sendPacket(const std::vector<hardware::neuralnetworks::V1_2::FmqRequestDatum>& packet);
196 
197     RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel);
198 
199    private:
200     const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel;
201     std::atomic<bool> mValid{true};
202 };
203 
204 /**
205  * The ExecutionBurstController class manages both the serialization and
206  * deserialization of data across FMQ, making it appear to the runtime as a
207  * regular synchronous inference. Additionally, this class manages the burst's
208  * memory cache.
209  */
210 class ExecutionBurstController {
211     DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstController);
212 
213    public:
214     /**
215      * NN runtime burst callback object and memory cache.
216      *
217      * ExecutionBurstCallback associates a hidl_memory object with a slot number
218      * to be passed across FMQ. The ExecutionBurstServer can use this callback
219      * to retrieve this hidl_memory corresponding to the slot via HIDL.
220      *
221      * Whenever a hidl_memory object is copied, it will duplicate the underlying
222      * file descriptor. Because the NN runtime currently copies the hidl_memory
223      * on each execution, it is difficult to associate hidl_memory objects with
224      * previously cached hidl_memory objects. For this reason, callers of this
225      * class must pair each hidl_memory object with an associated key. For
226      * efficiency, if two hidl_memory objects represent the same underlying
227      * buffer, they must use the same key.
228      */
229     class ExecutionBurstCallback : public hardware::neuralnetworks::V1_2::IBurstCallback {
230         DISALLOW_COPY_AND_ASSIGN(ExecutionBurstCallback);
231 
232        public:
233         ExecutionBurstCallback() = default;
234 
235         hardware::Return<void> getMemories(const hardware::hidl_vec<int32_t>& slots,
236                                            getMemories_cb cb) override;
237 
238         /**
239          * This function performs one of two different actions:
240          * 1) If a key corresponding to a memory resource is unrecognized by the
241          *    ExecutionBurstCallback object, the ExecutionBurstCallback object
242          *    will allocate a slot, bind the memory to the slot, and return the
243          *    slot identifier.
244          * 2) If a key corresponding to a memory resource is recognized by the
245          *    ExecutionBurstCallback object, the ExecutionBurstCallback object
246          *    will return the existing slot identifier.
247          *
248          * @param memories Memory resources used in an inference.
249          * @param keys Unique identifiers where each element corresponds to a
250          *     memory resource element in "memories".
251          * @return Unique slot identifiers where each returned slot element
252          *     corresponds to a memory resource element in "memories".
253          */
254         std::vector<int32_t> getSlots(const hardware::hidl_vec<hardware::hidl_memory>& memories,
255                                       const std::vector<intptr_t>& keys);
256 
257         /*
258          * This function performs two different actions:
259          * 1) Removes an entry from the cache (if present), including the local
260          *    storage of the hidl_memory object. Note that this call does not
261          *    free any corresponding hidl_memory object in ExecutionBurstServer,
262          *    which is separately freed via IBurstContext::freeMemory.
263          * 2) Return whether a cache entry was removed and which slot was removed if
264          *    found. If the key did not to correspond to any entry in the cache, a
265          *    slot number of 0 is returned. The slot number and whether the entry
266          *    existed is useful so the same slot can be freed in the
267          *    ExecutionBurstServer's cache via IBurstContext::freeMemory.
268          */
269         std::pair<bool, int32_t> freeMemory(intptr_t key);
270 
271        private:
272         int32_t getSlotLocked(const hardware::hidl_memory& memory, intptr_t key);
273         int32_t allocateSlotLocked();
274 
275         std::mutex mMutex;
276         std::stack<int32_t, std::vector<int32_t>> mFreeSlots;
277         std::map<intptr_t, int32_t> mMemoryIdToSlot;
278         std::vector<hardware::hidl_memory> mMemoryCache;
279     };
280 
281     /**
282      * Creates a burst controller on a prepared model.
283      *
284      * Prefer this over ExecutionBurstController's constructor.
285      *
286      * @param preparedModel Model prepared for execution to execute on.
287      * @param pollingTimeWindow How much time (in microseconds) the
288      *     ExecutionBurstController is allowed to poll the FMQ before waiting on
289      *     the blocking futex. Polling may result in lower latencies at the
290      *     potential cost of more power usage.
291      * @return ExecutionBurstController Execution burst controller object.
292      */
293     static std::unique_ptr<ExecutionBurstController> create(
294             const sp<hardware::neuralnetworks::V1_2::IPreparedModel>& preparedModel,
295             std::chrono::microseconds pollingTimeWindow);
296 
297     // prefer calling ExecutionBurstController::create
298     ExecutionBurstController(const std::shared_ptr<RequestChannelSender>& requestChannelSender,
299                              const std::shared_ptr<ResultChannelReceiver>& resultChannelReceiver,
300                              const sp<hardware::neuralnetworks::V1_2::IBurstContext>& burstContext,
301                              const sp<ExecutionBurstCallback>& callback,
302                              const sp<hardware::hidl_death_recipient>& deathHandler = nullptr);
303 
304     // explicit destructor to unregister the death recipient
305     ~ExecutionBurstController();
306 
307     /**
308      * Execute a request on a model.
309      *
310      * @param request Arguments to be executed on a model.
311      * @param measure Whether to collect timing measurements, either YES or NO
312      * @param memoryIds Identifiers corresponding to each memory object in the
313      *     request's pools.
314      * @return A tuple of:
315      *     - result code of the execution
316      *     - dynamic output shapes from the execution
317      *     - any execution time measurements of the execution
318      *     - whether or not a failed burst execution should be re-run using a
319      *       different path (e.g., IPreparedModel::executeSynchronously)
320      */
321     std::tuple<int, std::vector<hardware::neuralnetworks::V1_2::OutputShape>,
322                hardware::neuralnetworks::V1_2::Timing, bool>
323     compute(const hardware::neuralnetworks::V1_0::Request& request,
324             hardware::neuralnetworks::V1_2::MeasureTiming measure,
325             const std::vector<intptr_t>& memoryIds);
326 
327     /**
328      * Propagate a user's freeing of memory to the service.
329      *
330      * @param key Key corresponding to the memory object.
331      */
332     void freeMemory(intptr_t key);
333 
334    private:
335     std::mutex mMutex;
336     const std::shared_ptr<RequestChannelSender> mRequestChannelSender;
337     const std::shared_ptr<ResultChannelReceiver> mResultChannelReceiver;
338     const sp<hardware::neuralnetworks::V1_2::IBurstContext> mBurstContext;
339     const sp<ExecutionBurstCallback> mMemoryCache;
340     const sp<hardware::hidl_death_recipient> mDeathHandler;
341 };
342 
343 }  // namespace android::nn
344 
345 #endif  // ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
346