1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H
18 #define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H
19 
20 #include <android-base/macros.h>
21 #include <android/hardware/neuralnetworks/1.0/types.h>
22 #include <android/hardware/neuralnetworks/1.1/types.h>
23 #include <android/hardware/neuralnetworks/1.2/IBurstCallback.h>
24 #include <android/hardware/neuralnetworks/1.2/IPreparedModel.h>
25 #include <android/hardware/neuralnetworks/1.2/types.h>
26 #include <fmq/MessageQueue.h>
27 #include <hidl/MQDescriptor.h>
28 
29 #include <atomic>
30 #include <chrono>
31 #include <memory>
32 #include <optional>
33 #include <thread>
34 #include <tuple>
35 #include <vector>
36 
37 namespace android::nn {
38 
39 using FmqRequestDescriptor =
40         hardware::MQDescriptorSync<hardware::neuralnetworks::V1_2::FmqRequestDatum>;
41 using FmqResultDescriptor =
42         hardware::MQDescriptorSync<hardware::neuralnetworks::V1_2::FmqResultDatum>;
43 
44 /**
45  * Function to serialize results.
46  *
47  * Prefer calling ResultChannelSender::send.
48  *
49  * @param errorStatus Status of the execution.
50  * @param outputShapes Dynamic shapes of the output tensors.
51  * @param timing Timing information of the execution.
52  * @return Serialized FMQ result data.
53  */
54 std::vector<hardware::neuralnetworks::V1_2::FmqResultDatum> serialize(
55         hardware::neuralnetworks::V1_0::ErrorStatus errorStatus,
56         const std::vector<hardware::neuralnetworks::V1_2::OutputShape>& outputShapes,
57         hardware::neuralnetworks::V1_2::Timing timing);
58 
59 /**
60  * Deserialize the FMQ request data.
61  *
62  * The three resulting fields are the Request object (where Request::pools is
63  * empty), slot identifiers (which are stand-ins for Request::pools), and
64  * whether timing information must be collected for the run.
65  *
66  * @param data Serialized FMQ request data.
67  * @return Request object if successfully deserialized, std::nullopt otherwise.
68  */
69 std::optional<std::tuple<hardware::neuralnetworks::V1_0::Request, std::vector<int32_t>,
70                          hardware::neuralnetworks::V1_2::MeasureTiming>>
71 deserialize(const std::vector<hardware::neuralnetworks::V1_2::FmqRequestDatum>& data);
72 
73 /**
74  * RequestChannelReceiver is responsible for waiting on the channel until the
75  * packet is available, extracting the packet from the channel, and
76  * deserializing the packet.
77  *
78  * Because the receiver can wait on a packet that may never come (e.g., because
79  * the sending side of the packet has been closed), this object can be
80  * invalidated, unblocking the receiver.
81  */
82 class RequestChannelReceiver {
83     using FmqRequestChannel =
84             hardware::MessageQueue<hardware::neuralnetworks::V1_2::FmqRequestDatum,
85                                    hardware::kSynchronizedReadWrite>;
86 
87    public:
88     /**
89      * Create the receiving end of a request channel.
90      *
91      * Prefer this call over the constructor.
92      *
93      * @param requestChannel Descriptor for the request channel.
94      * @param pollingTimeWindow How much time (in microseconds) the
95      *     RequestChannelReceiver is allowed to poll the FMQ before waiting on
96      *     the blocking futex. Polling may result in lower latencies at the
97      *     potential cost of more power usage.
98      * @return RequestChannelReceiver on successful creation, nullptr otherwise.
99      */
100     static std::unique_ptr<RequestChannelReceiver> create(
101             const FmqRequestDescriptor& requestChannel,
102             std::chrono::microseconds pollingTimeWindow);
103 
104     /**
105      * Get the request from the channel.
106      *
107      * This method will block until either:
108      * 1) The packet has been retrieved, or
109      * 2) The receiver has been invalidated
110      *
111      * @return Request object if successfully received, std::nullopt if error or
112      *     if the receiver object was invalidated.
113      */
114     std::optional<std::tuple<hardware::neuralnetworks::V1_0::Request, std::vector<int32_t>,
115                              hardware::neuralnetworks::V1_2::MeasureTiming>>
116     getBlocking();
117 
118     /**
119      * Method to mark the channel as invalid, unblocking any current or future
120      * calls to RequestChannelReceiver::getBlocking.
121      */
122     void invalidate();
123 
124     RequestChannelReceiver(std::unique_ptr<FmqRequestChannel> fmqRequestChannel,
125                            std::chrono::microseconds pollingTimeWindow);
126 
127    private:
128     std::optional<std::vector<hardware::neuralnetworks::V1_2::FmqRequestDatum>> getPacketBlocking();
129 
130     const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel;
131     std::atomic<bool> mTeardown{false};
132     const std::chrono::microseconds kPollingTimeWindow;
133 };
134 
135 /**
136  * ResultChannelSender is responsible for serializing the result packet of
137  * information, sending it on the result channel, and signaling that the data is
138  * available.
139  */
140 class ResultChannelSender {
141     using FmqResultChannel = hardware::MessageQueue<hardware::neuralnetworks::V1_2::FmqResultDatum,
142                                                     hardware::kSynchronizedReadWrite>;
143 
144    public:
145     /**
146      * Create the sending end of a result channel.
147      *
148      * Prefer this call over the constructor.
149      *
150      * @param resultChannel Descriptor for the result channel.
151      * @return ResultChannelSender on successful creation, nullptr otherwise.
152      */
153     static std::unique_ptr<ResultChannelSender> create(const FmqResultDescriptor& resultChannel);
154 
155     /**
156      * Send the result to the channel.
157      *
158      * @param errorStatus Status of the execution.
159      * @param outputShapes Dynamic shapes of the output tensors.
160      * @param timing Timing information of the execution.
161      * @return 'true' on successful send, 'false' otherwise.
162      */
163     bool send(hardware::neuralnetworks::V1_0::ErrorStatus errorStatus,
164               const std::vector<hardware::neuralnetworks::V1_2::OutputShape>& outputShapes,
165               hardware::neuralnetworks::V1_2::Timing timing);
166 
167     // prefer calling ResultChannelSender::send
168     bool sendPacket(const std::vector<hardware::neuralnetworks::V1_2::FmqResultDatum>& packet);
169 
170     ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel);
171 
172    private:
173     const std::unique_ptr<FmqResultChannel> mFmqResultChannel;
174 };
175 
176 /**
177  * The ExecutionBurstServer class is responsible for waiting for and
178  * deserializing a request object from a FMQ, performing the inference, and
179  * serializing the result back across another FMQ.
180  */
181 class ExecutionBurstServer : public hardware::neuralnetworks::V1_2::IBurstContext {
182     DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstServer);
183 
184    public:
185     /**
186      * IBurstExecutorWithCache is a callback object passed to
187      * ExecutionBurstServer's factory function that is used to perform an
188      * execution. Because some memory resources are needed across multiple
189      * executions, this object also contains a local cache that can directly be
190      * used in the execution.
191      *
192      * ExecutionBurstServer will never access its IBurstExecutorWithCache object
193      * with concurrent calls.
194      */
195     class IBurstExecutorWithCache {
196         DISALLOW_COPY_AND_ASSIGN(IBurstExecutorWithCache);
197 
198        public:
199         IBurstExecutorWithCache() = default;
200         virtual ~IBurstExecutorWithCache() = default;
201 
202         /**
203          * Checks if a cache entry specified by a slot is present in the cache.
204          *
205          * @param slot Identifier of the cache entry.
206          * @return 'true' if the cache entry is present in the cache, 'false'
207          *     otherwise.
208          */
209         virtual bool isCacheEntryPresent(int32_t slot) const = 0;
210 
211         /**
212          * Adds an entry specified by a slot to the cache.
213          *
214          * The caller of this function must ensure that the cache entry that is
215          * being added is not already present in the cache. This can be checked
216          * via isCacheEntryPresent.
217          *
218          * @param memory Memory resource to be cached.
219          * @param slot Slot identifier corresponding to the memory resource.
220          */
221         virtual void addCacheEntry(const hardware::hidl_memory& memory, int32_t slot) = 0;
222 
223         /**
224          * Removes an entry specified by a slot from the cache.
225          *
226          * If the cache entry corresponding to the slot number does not exist,
227          * the call does nothing.
228          *
229          * @param slot Slot identifier corresponding to the memory resource.
230          */
231         virtual void removeCacheEntry(int32_t slot) = 0;
232 
233         /**
234          * Perform an execution.
235          *
236          * @param request Request object with inputs and outputs specified.
237          *     Request::pools is empty, and DataLocation::poolIndex instead
238          *     refers to the 'slots' argument as if it were Request::pools.
239          * @param slots Slots corresponding to the cached memory entries to be
240          *     used.
241          * @param measure Whether timing information is requested for the
242          *     execution.
243          * @return Result of the execution, including the status of the
244          *     execution, dynamic output shapes, and any timing information.
245          */
246         virtual std::tuple<hardware::neuralnetworks::V1_0::ErrorStatus,
247                            hardware::hidl_vec<hardware::neuralnetworks::V1_2::OutputShape>,
248                            hardware::neuralnetworks::V1_2::Timing>
249         execute(const hardware::neuralnetworks::V1_0::Request& request,
250                 const std::vector<int32_t>& slots,
251                 hardware::neuralnetworks::V1_2::MeasureTiming measure) = 0;
252     };
253 
254     /**
255      * Create automated context to manage FMQ-based executions.
256      *
257      * This function is intended to be used by a service to automatically:
258      * 1) Receive data from a provided FMQ
259      * 2) Execute a model with the given information
260      * 3) Send the result to the created FMQ
261      *
262      * @param callback Callback used to retrieve memories corresponding to
263      *     unrecognized slots.
264      * @param requestChannel Input FMQ channel through which the client passes the
265      *     request to the service.
266      * @param resultChannel Output FMQ channel from which the client can retrieve
267      *     the result of the execution.
268      * @param executorWithCache Object which maintains a local cache of the
269      *     memory pools and executes using the cached memory pools.
270      * @param pollingTimeWindow How much time (in microseconds) the
271      *     ExecutionBurstServer is allowed to poll the FMQ before waiting on
272      *     the blocking futex. Polling may result in lower latencies at the
273      *     potential cost of more power usage.
274      * @result IBurstContext Handle to the burst context.
275      */
276     static sp<ExecutionBurstServer> create(
277             const sp<hardware::neuralnetworks::V1_2::IBurstCallback>& callback,
278             const FmqRequestDescriptor& requestChannel, const FmqResultDescriptor& resultChannel,
279             std::shared_ptr<IBurstExecutorWithCache> executorWithCache,
280             std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0});
281 
282     /**
283      * Create automated context to manage FMQ-based executions.
284      *
285      * This function is intended to be used by a service to automatically:
286      * 1) Receive data from a provided FMQ
287      * 2) Execute a model with the given information
288      * 3) Send the result to the created FMQ
289      *
290      * @param callback Callback used to retrieve memories corresponding to
291      *     unrecognized slots.
292      * @param requestChannel Input FMQ channel through which the client passes the
293      *     request to the service.
294      * @param resultChannel Output FMQ channel from which the client can retrieve
295      *     the result of the execution.
296      * @param preparedModel PreparedModel that the burst object was created from.
297      *     IPreparedModel::executeSynchronously will be used to perform the
298      *     execution.
299      * @param pollingTimeWindow How much time (in microseconds) the
300      *     ExecutionBurstServer is allowed to poll the FMQ before waiting on
301      *     the blocking futex. Polling may result in lower latencies at the
302      *     potential cost of more power usage.
303      * @result IBurstContext Handle to the burst context.
304      */
305     static sp<ExecutionBurstServer> create(
306             const sp<hardware::neuralnetworks::V1_2::IBurstCallback>& callback,
307             const FmqRequestDescriptor& requestChannel, const FmqResultDescriptor& resultChannel,
308             hardware::neuralnetworks::V1_2::IPreparedModel* preparedModel,
309             std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0});
310 
311     ExecutionBurstServer(const sp<hardware::neuralnetworks::V1_2::IBurstCallback>& callback,
312                          std::unique_ptr<RequestChannelReceiver> requestChannel,
313                          std::unique_ptr<ResultChannelSender> resultChannel,
314                          std::shared_ptr<IBurstExecutorWithCache> cachedExecutor);
315     ~ExecutionBurstServer();
316 
317     // Used by the NN runtime to preemptively remove any stored memory.
318     hardware::Return<void> freeMemory(int32_t slot) override;
319 
320    private:
321     // Ensures all cache entries contained in mExecutorWithCache are present in
322     // the cache. If they are not present, they are retrieved (via
323     // IBurstCallback::getMemories) and added to mExecutorWithCache.
324     //
325     // This method is locked via mMutex when it is called.
326     void ensureCacheEntriesArePresentLocked(const std::vector<int32_t>& slots);
327 
328     // Work loop that will continue processing execution requests until the
329     // ExecutionBurstServer object is freed.
330     void task();
331 
332     std::thread mWorker;
333     std::mutex mMutex;
334     std::atomic<bool> mTeardown{false};
335     const sp<hardware::neuralnetworks::V1_2::IBurstCallback> mCallback;
336     const std::unique_ptr<RequestChannelReceiver> mRequestChannelReceiver;
337     const std::unique_ptr<ResultChannelSender> mResultChannelSender;
338     const std::shared_ptr<IBurstExecutorWithCache> mExecutorWithCache;
339 };
340 
341 }  // namespace android::nn
342 
343 #endif  // ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H
344