1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <gtest/gtest.h>
18 
19 #include <chrono>
20 #include <iterator>
21 #include <map>
22 #include <queue>
23 #include <set>
24 #include <string>
25 #include <thread>
26 #include <tuple>
27 #include <utility>
28 #include <vector>
29 
30 #include "CompilationBuilder.h"
31 #include "ExecutionBurstServer.h"
32 #include "HalInterfaces.h"
33 #include "Manager.h"
34 #include "NeuralNetworks.h"
35 #include "NeuralNetworksOEM.h"
36 #include "SampleDriver.h"
37 #include "TestNeuralNetworksWrapper.h"
38 #include "Utils.h"
39 #include "ValidateHal.h"
40 
41 namespace {
42 
43 using namespace ::android;
44 using namespace nn::hal;
45 
46 using CompilationBuilder = nn::CompilationBuilder;
47 using Device = nn::Device;
48 using DeviceManager = nn::DeviceManager;
49 using ExecutePreference = nn::test_wrapper::ExecutePreference;
50 using ExecutionBurstServer = nn::ExecutionBurstServer;
51 using HidlModel = V1_3::Model;
52 using PreparedModelCallback = nn::PreparedModelCallback;
53 using Result = nn::test_wrapper::Result;
54 using SampleDriver = nn::sample_driver::SampleDriver;
55 using SamplePreparedModel = nn::sample_driver::SamplePreparedModel;
56 using SampleFencedExecutionCallback = nn::sample_driver::SampleFencedExecutionCallback;
57 using WrapperModel = nn::test_wrapper::Model;
58 using WrapperOperandType = nn::test_wrapper::OperandType;
59 using WrapperType = nn::test_wrapper::Type;
60 using nn::convertToV1_0;
61 using nn::convertToV1_3;
62 
63 template <typename T>
64 using MQDescriptorSync = hardware::MQDescriptorSync<T>;
65 
66 constexpr Timing kBadTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
67 constexpr Timing kGoodUnfencedTiming = {.timeOnDevice = 123, .timeInDriver = 456};
68 constexpr Timing kGoodFencedTiming = {.timeOnDevice = 23, .timeInDriver = 56};
69 
70 // This is an IDevice for testing purposes. The test driver has customized
71 // getCapabilities_1_3 and getSupportedOperations_1_3.
72 class TestDriver : public SampleDriver {
73    public:
TestDriver(const char * name,Capabilities capabilities,const std::vector<bool> & supportedOps)74     TestDriver(const char* name, Capabilities capabilities, const std::vector<bool>& supportedOps)
75         : SampleDriver(name), mCapabilities(capabilities), mSupportedOps(supportedOps) {}
~TestDriver()76     ~TestDriver() override {}
77 
getCapabilities_1_3(getCapabilities_1_3_cb cb)78     Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override {
79         cb(V1_3::ErrorStatus::NONE, mCapabilities);
80         return Void();
81     }
82 
getSupportedOperations_1_3(const Model & model,getSupportedOperations_1_3_cb cb)83     Return<void> getSupportedOperations_1_3(const Model& model,
84                                             getSupportedOperations_1_3_cb cb) override {
85         if (!android::nn::validateModel(model)) {
86             cb(V1_3::ErrorStatus::INVALID_ARGUMENT, std::vector<bool>());
87             return Void();
88         }
89         const size_t count = model.main.operations.size();
90         std::vector<bool> supported(count);
91         std::transform(
92                 model.main.operations.begin(), model.main.operations.end(), supported.begin(),
93                 [this](Operation op) { return mSupportedOps[static_cast<int32_t>(op.type)]; });
94         cb(V1_3::ErrorStatus::NONE, supported);
95         return Void();
96     }
97 
98    private:
99     Capabilities mCapabilities;
100     std::vector<bool> mSupportedOps;
101 };
102 
103 class IntrospectionControlTest : public ::testing::Test {
104    protected:
SetUp()105     virtual void SetUp() {}
TearDown()106     virtual void TearDown() {
107         if (mEvent) {
108             ANeuralNetworksEvent_free(mEvent);
109         }
110         if (mExecution) {
111             ANeuralNetworksExecution_free(mExecution);
112         }
113         if (mCompilation) {
114             ANeuralNetworksCompilation_free(mCompilation);
115         }
116         DeviceManager::get()->forTest_reInitializeDeviceList();
117     }
118 
119     struct DeviceSpecification {
DeviceSpecification__anon736024e70111::IntrospectionControlTest::DeviceSpecification120         DeviceSpecification(const std::string& name, float perf, std::vector<bool>& supportedOps)
121             : mName(name), mSupportedOps(supportedOps) {
122             PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
123             mCapabilities = {
124                     .relaxedFloat32toFloat16PerformanceScalar = perfInfo,
125                     .relaxedFloat32toFloat16PerformanceTensor = perfInfo,
126                     .operandPerformance =
127                             nn::nonExtensionOperandPerformance<nn::HalVersion::V1_3>(perfInfo),
128                     .ifPerformance = perfInfo,
129                     .whilePerformance = perfInfo};
130         }
131         std::string mName;
132         Capabilities mCapabilities;
133         std::vector<bool> mSupportedOps;
134     };
135 
136     // From a vector of DeviceSpecification, register new Devices.
registerDevices(std::vector<DeviceSpecification> specifications)137     void registerDevices(std::vector<DeviceSpecification> specifications) {
138         for (const auto& specification : specifications) {
139             DeviceManager::get()->forTest_registerDevice(
140                     specification.mName.c_str(),
141                     new TestDriver(specification.mName.c_str(), specification.mCapabilities,
142                                    specification.mSupportedOps));
143         }
144     }
145 
selectDeviceByName(const std::string & name)146     bool selectDeviceByName(const std::string& name) {
147         uint32_t numDevices = 0;
148         EXPECT_EQ(ANeuralNetworks_getDeviceCount(&numDevices), ANEURALNETWORKS_NO_ERROR);
149         EXPECT_GE(numDevices, (uint32_t)1);
150 
151         for (uint32_t i = 0; i < numDevices; i++) {
152             ANeuralNetworksDevice* device = nullptr;
153             EXPECT_EQ(ANeuralNetworks_getDevice(i, &device), ANEURALNETWORKS_NO_ERROR);
154             const char* buffer = nullptr;
155             int result = ANeuralNetworksDevice_getName(device, &buffer);
156             if (result == ANEURALNETWORKS_NO_ERROR && name.compare(buffer) == 0) {
157                 mDevices.push_back(device);
158                 return true;
159             }
160         }
161         return false;
162     }
163 
isSupportedOpListExpected(const std::vector<bool> & expected)164     bool isSupportedOpListExpected(const std::vector<bool>& expected) {
165         const uint32_t kMaxNumberOperations = 256;
166         EXPECT_LE(expected.size(), kMaxNumberOperations);
167         ANeuralNetworksModel* modelHandle = mModel.getHandle();
168         bool supported[kMaxNumberOperations] = {false};
169         EXPECT_EQ(ANeuralNetworksModel_getSupportedOperationsForDevices(
170                           modelHandle, mDevices.data(), mDevices.size(), supported),
171                   ANEURALNETWORKS_NO_ERROR);
172         return std::equal(expected.begin(), expected.end(), supported);
173     }
174 
prepareForExecution(bool measureTiming=false)175     int prepareForExecution(bool measureTiming = false) {
176         ANeuralNetworksModel* modelHandle = mModel.getHandle();
177         int result = ANeuralNetworksCompilation_createForDevices(modelHandle, mDevices.data(),
178                                                                  mDevices.size(), &mCompilation);
179         if (result != ANEURALNETWORKS_NO_ERROR) {
180             return result;
181         }
182         EXPECT_EQ(ANeuralNetworksCompilation_finish(mCompilation), ANEURALNETWORKS_NO_ERROR);
183         EXPECT_EQ(ANeuralNetworksExecution_create(mCompilation, &mExecution),
184                   ANEURALNETWORKS_NO_ERROR);
185         if (measureTiming) {
186             // Don't call setMeasureTiming unless we need to -- cannot call this
187             // API unless there is exactly one device.
188             EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
189                       ANEURALNETWORKS_NO_ERROR);
190         }
191         return ANEURALNETWORKS_NO_ERROR;
192     }
193 
194     std::vector<ANeuralNetworksDevice*> mDevices;
195     ANeuralNetworksEvent* mEvent = nullptr;
196     ANeuralNetworksExecution* mExecution = nullptr;
197     ANeuralNetworksCompilation* mCompilation = nullptr;
198     WrapperModel mModel;
199 };
200 
createSimpleAddModel(WrapperModel * model)201 void createSimpleAddModel(WrapperModel* model) {
202     WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
203     WrapperOperandType type1(WrapperType::INT32, {});
204     // Phase 1, operands
205     auto op1 = model->addOperand(&type0);
206     auto op2 = model->addOperand(&type0);
207     auto act = model->addOperand(&type1);
208     auto op3 = model->addOperand(&type0);
209     // Phase 2, operations
210     static int32_t act_init[] = {0};
211     model->setOperandValue(act, act_init, sizeof(act_init));
212     model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
213     // Phase 3, inputs and outputs
214     model->identifyInputsAndOutputs({op1, op2}, {op3});
215     model->finish();
216     ASSERT_TRUE(model->isValid());
217 }
218 
219 // This test verifies that a simple ADD model is able to run on a single device that claims being
220 // able to handle all operations.
TEST_F(IntrospectionControlTest,SimpleAddModel)221 TEST_F(IntrospectionControlTest, SimpleAddModel) {
222     // This is needed before we have the CPU fallback path being treated as a Device.
223     // TODO(miaowang): remove once b/72506261 is fixed.
224     if (DeviceManager::get()->getUseCpuOnly()) {
225         GTEST_SKIP();
226     }
227 
228     createSimpleAddModel(&mModel);
229 
230     std::string driverName = "test-all";
231     std::vector<bool> ops(android::nn::kNumberOfOperationTypes, true);
232     registerDevices({{driverName, 0.9, ops}});
233 
234     EXPECT_TRUE(selectDeviceByName(driverName));
235     EXPECT_TRUE(isSupportedOpListExpected({true}));
236     EXPECT_EQ(prepareForExecution(), ANEURALNETWORKS_NO_ERROR);
237 
238     // Verify that the mCompilation is actually using the "test-all" device.
239     CompilationBuilder* c = reinterpret_cast<CompilationBuilder*>(mCompilation);
240     const std::string& deviceNameBuffer =
241             c->forTest_getExecutionPlan().forTest_simpleGetDevice()->getName();
242     EXPECT_EQ(driverName, deviceNameBuffer);
243 
244     float input1[2] = {1.0f, 2.0f};
245     float input2[2] = {3.0f, 4.0f};
246     float output[2];
247     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
248               ANEURALNETWORKS_NO_ERROR);
249     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
250               ANEURALNETWORKS_NO_ERROR);
251     EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
252               ANEURALNETWORKS_NO_ERROR);
253     EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
254               ANEURALNETWORKS_NO_ERROR);
255 
256     EXPECT_EQ(ANeuralNetworksExecution_startCompute(mExecution, &mEvent), ANEURALNETWORKS_NO_ERROR);
257     EXPECT_EQ(ANeuralNetworksEvent_wait(mEvent), ANEURALNETWORKS_NO_ERROR);
258     EXPECT_EQ(output[0], input1[0] + input2[0]);
259     EXPECT_EQ(output[1], input1[1] + input2[1]);
260 
261     uint64_t timeOnHardware, timeInDriver;
262     EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_ON_HARDWARE,
263                                                    &timeOnHardware),
264               ANEURALNETWORKS_NO_ERROR);
265     EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_IN_DRIVER,
266                                                    &timeInDriver),
267               ANEURALNETWORKS_NO_ERROR);
268     if (timeOnHardware != UINT64_MAX && timeInDriver != UINT64_MAX) {
269         EXPECT_LE(timeOnHardware, timeInDriver);
270     }
271 }
272 
273 /*-- Begin test drivers -------------------------------------------------------------------------*/
274 
275 namespace test_drivers {
276 
277 enum class Success : uint32_t {
278     // ASYNC: Return ErrorStatus::GENERAL_FAILURE; notify ErrorStatus::GENERAL_FAILURE and
279     // kBadTiming
280     // SYNC, BURST: Return ErrorStatus::GENERAL_FAILURE and kBadTiming
281     // FENCED: Return ErrorStatus::GENERAL_FAILURE, empty hidl_handle, and a nullptr callback
282     FAIL_LAUNCH,
283 
284     // ASYNC: Return ErrorStatus::NONE; notify ErrorStatus::GENERAL_FAILURE and kBadTiming
285     FAIL_WAIT,
286 
287     // Bit representation for PASS: One bit set to indicate PASS rather than
288     // FAIL, one bit for each of the four timing fields (Unfenced, Fenced) x
289     // (OnDevice, InDriver) to distinguish between unavailable timing (bit is
290     // clear) and available timing (bit is set), and one bit to call out the
291     // special case of CPU.
292     PASS_BIT = 1 << 4,
293     PASS_UNFENCED_DEVICE_BIT = 1 << 5,
294     PASS_UNFENCED_DRIVER_BIT = 1 << 6,
295     PASS_FENCED_DEVICE_BIT = 1 << 7,
296     PASS_FENCED_DRIVER_BIT = 1 << 8,
297     PASS_CPU_BIT = 1 << 9,
298 
299     // Each of the four timing fields may be either unavailable or 0
300     PASS_CPU = PASS_BIT | PASS_CPU_BIT,
301 
302     // ASYNC: Return ErrorStatus::NONE; notify ErrorStatus::NONE and timing
303     // SYNC, BURST: Return ErrorStatus::NONE and timing
304     // FENCED: Return ErrorStatus::NONE, empty hidl_handle, and a callback with timing.
305     //
306     // For each PASS other than PASS_CPU, an enum name has the form
307     // PASS_${UNFENCED_TIME}_${FENCED_TIME}.  For example, PASS_NEITHER_BOTH
308     // means that only fenced timing is available (both timeOnDevice and
309     // timeInDriver).  If _${FENCED_TIME} is omitted, it is equivalent to
310     // _NEITHER; so PASS_BOTH means that only unfenced timing is available (both
311     // timeOnDevice and timeInDriver).
312     PASS_NEITHER = PASS_BIT,
313     PASS_DEVICE = PASS_BIT | PASS_UNFENCED_DEVICE_BIT,
314     PASS_DRIVER = PASS_BIT | PASS_UNFENCED_DRIVER_BIT,
315     PASS_BOTH = PASS_BIT | PASS_UNFENCED_DEVICE_BIT | PASS_UNFENCED_DRIVER_BIT,
316     PASS_NEITHER_DEVICE = PASS_BIT | PASS_FENCED_DEVICE_BIT,
317     PASS_NEITHER_DRIVER = PASS_BIT | PASS_FENCED_DRIVER_BIT,
318     PASS_NEITHER_BOTH = PASS_BIT | PASS_FENCED_DEVICE_BIT | PASS_FENCED_DRIVER_BIT,
319     PASS_DEVICE_DEVICE = PASS_DEVICE | PASS_NEITHER_DEVICE,
320     PASS_DEVICE_DRIVER = PASS_DEVICE | PASS_NEITHER_DRIVER,
321     PASS_DEVICE_BOTH = PASS_DEVICE | PASS_NEITHER_BOTH,
322     PASS_DRIVER_DEVICE = PASS_DRIVER | PASS_NEITHER_DEVICE,
323     PASS_DRIVER_DRIVER = PASS_DRIVER | PASS_NEITHER_DRIVER,
324     PASS_DRIVER_BOTH = PASS_DRIVER | PASS_NEITHER_BOTH,
325     PASS_BOTH_DEVICE = PASS_BOTH | PASS_NEITHER_DEVICE,
326     PASS_BOTH_DRIVER = PASS_BOTH | PASS_NEITHER_DRIVER,
327     PASS_BOTH_BOTH = PASS_BOTH | PASS_NEITHER_BOTH,
328 };
329 
hasBit(Success mask,Success bit)330 bool hasBit(Success mask, Success bit) {
331     const uint32_t bitAsInt = static_cast<uint32_t>(bit);
332     CHECK(bitAsInt && (bitAsInt & (bitAsInt - 1)) == 0)
333             << "second argument must be a single bit rather than " << static_cast<uint32_t>(bit);
334     return static_cast<uint32_t>(mask) & bitAsInt;
335 }
336 
clearBit(Success mask,Success bit)337 Success clearBit(Success mask, Success bit) {
338     const uint32_t bitAsInt = static_cast<uint32_t>(bit);
339     CHECK(bitAsInt && (bitAsInt & (bitAsInt - 1)) == 0)
340             << "second argument must be a single bit rather than " << static_cast<uint32_t>(bit);
341     return static_cast<Success>(static_cast<uint32_t>(mask) & ~bitAsInt);
342 }
343 
operator <<(std::ostream & os,Success success)344 std::ostream& operator<<(std::ostream& os, Success success) {
345     switch (success) {
346         case Success::FAIL_LAUNCH:
347             return os << "FAIL_LAUNCH";
348         case Success::FAIL_WAIT:
349             return os << "FAIL_WAIT";
350         case Success::PASS_CPU:
351             return os << "PASS_CPU";
352         default:
353             break;
354     }
355 
356     static const std::vector<std::pair<Success, const char*>> bits = {
357             {Success::PASS_BIT, "PASS"},
358             {Success::PASS_UNFENCED_DEVICE_BIT, "UNFENCED_DEVICE"},
359             {Success::PASS_UNFENCED_DRIVER_BIT, "UNFENCED_DRIVER"},
360             {Success::PASS_FENCED_DEVICE_BIT, "FENCED_DEVICE"},
361             {Success::PASS_FENCED_DRIVER_BIT, "FENCED_DRIVER"},
362     };
363     bool gotOutput = false;
364     for (const auto& b : bits) {
365         if (hasBit(success, b.first)) {
366             if (gotOutput) {
367                 os << '|';
368             } else {
369                 gotOutput = true;
370             }
371             os << b.second;
372             success = clearBit(success, b.first);
373         }
374     }
375     if (uint32_t successAsInt = static_cast<uint32_t>(success)) {
376         if (gotOutput) {
377             os << '|';
378         }
379         os << successAsInt;
380     }
381     return os;
382 }
383 
384 // Returns (unfenced timing, fenced timing).
385 // Not for PASS_CPU.
getExpectedTiming(Success s,bool fencedExecution)386 std::pair<Timing, Timing> getExpectedTiming(Success s, bool fencedExecution) {
387     CHECK_NE(s, Success::PASS_CPU);
388 
389     if (!hasBit(s, Success::PASS_BIT)) {
390         return {kBadTiming, kBadTiming};
391     }
392 
393     std::pair<Timing, Timing> result;
394     result.first.timeOnDevice = hasBit(s, Success::PASS_UNFENCED_DEVICE_BIT)
395                                         ? kGoodUnfencedTiming.timeOnDevice
396                                         : UINT64_MAX;
397     result.first.timeInDriver = hasBit(s, Success::PASS_UNFENCED_DRIVER_BIT)
398                                         ? kGoodUnfencedTiming.timeInDriver
399                                         : UINT64_MAX;
400     if (fencedExecution) {
401         result.second.timeOnDevice = hasBit(s, Success::PASS_FENCED_DEVICE_BIT)
402                                              ? kGoodFencedTiming.timeOnDevice
403                                              : UINT64_MAX;
404         result.second.timeInDriver = hasBit(s, Success::PASS_FENCED_DRIVER_BIT)
405                                              ? kGoodFencedTiming.timeInDriver
406                                              : UINT64_MAX;
407     } else {
408         result.second = result.first;
409     }
410     return result;
411 }
412 
413 // For these tests we don't care about actually running an inference -- we
414 // just want to placeholder up execution status and timing results, and control
415 // when the execution finishes.
416 class TestPreparedModelLatest : public SamplePreparedModel {
417    public:
TestPreparedModelLatest(const HidlModel & model,const SampleDriver * driver,Success success)418     TestPreparedModelLatest(const HidlModel& model, const SampleDriver* driver, Success success)
419         : SamplePreparedModel(model, driver, ExecutionPreference::FAST_SINGLE_ANSWER, uid_t{},
420                               kDefaultPriority),
421           mSuccess(success) {}
422 
execute(const V1_0::Request &,const sp<V1_0::IExecutionCallback> & callback)423     Return<V1_0::ErrorStatus> execute(const V1_0::Request&,
424                                       const sp<V1_0::IExecutionCallback>& callback) override {
425         switch (mSuccess) {
426             case Success::PASS_NEITHER:
427                 std::thread([callback] {
428                     dummyExecution();
429                     callback->notify(V1_0::ErrorStatus::NONE);
430                 }).detach();
431                 return V1_0::ErrorStatus::NONE;
432             case Success::FAIL_LAUNCH:
433                 dummyExecution();
434                 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
435                 return V1_0::ErrorStatus::GENERAL_FAILURE;
436             case Success::FAIL_WAIT:
437                 std::thread([callback] {
438                     dummyExecution();
439                     callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
440                 }).detach();
441                 return V1_0::ErrorStatus::NONE;
442             default:
443                 ADD_FAILURE() << "Unexpected Success kind";
444                 return V1_0::ErrorStatus::GENERAL_FAILURE;
445         }
446     }
447 
execute_1_2(const V1_0::Request &,MeasureTiming measure,const sp<V1_2::IExecutionCallback> & callback)448     Return<V1_0::ErrorStatus> execute_1_2(const V1_0::Request&, MeasureTiming measure,
449                                           const sp<V1_2::IExecutionCallback>& callback) override {
450         EXPECT_EQ(measure, MeasureTiming::YES);
451         switch (mSuccess) {
452             case Success::PASS_NEITHER:
453             case Success::PASS_DEVICE:
454             case Success::PASS_DRIVER:
455             case Success::PASS_BOTH:
456                 std::thread([this, callback] {
457                     dummyExecution();
458                     callback->notify_1_2(V1_0::ErrorStatus::NONE, {},
459                                          getExpectedTiming(mSuccess, false).first);
460                 }).detach();
461                 return V1_0::ErrorStatus::NONE;
462             case Success::FAIL_LAUNCH:
463                 dummyExecution();
464                 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
465                 return V1_0::ErrorStatus::GENERAL_FAILURE;
466             case Success::FAIL_WAIT:
467                 std::thread([callback] {
468                     dummyExecution();
469                     callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
470                 }).detach();
471                 return V1_0::ErrorStatus::NONE;
472             default:
473                 ADD_FAILURE() << "Unexpected Success kind";
474                 return V1_0::ErrorStatus::GENERAL_FAILURE;
475         }
476     }
477 
execute_1_3(const V1_3::Request &,MeasureTiming measure,const OptionalTimePoint &,const OptionalTimeoutDuration &,const sp<V1_3::IExecutionCallback> & callback)478     Return<V1_3::ErrorStatus> execute_1_3(const V1_3::Request&, MeasureTiming measure,
479                                           const OptionalTimePoint&, const OptionalTimeoutDuration&,
480                                           const sp<V1_3::IExecutionCallback>& callback) override {
481         // Use a placeholder V1_0::Request because execute_1_2 ignores request entirely.
482         const V1_0::ErrorStatus status = execute_1_2(V1_0::Request{}, measure, callback);
483         return convertToV1_3(status);
484     }
485 
executeSynchronously(const V1_0::Request &,MeasureTiming measure,executeSynchronously_cb cb)486     Return<void> executeSynchronously(const V1_0::Request&, MeasureTiming measure,
487                                       executeSynchronously_cb cb) override {
488         EXPECT_EQ(measure, MeasureTiming::YES);
489         switch (mSuccess) {
490             case Success::PASS_NEITHER:
491             case Success::PASS_DEVICE:
492             case Success::PASS_DRIVER:
493             case Success::PASS_BOTH:
494                 dummyExecution();
495                 cb(V1_0::ErrorStatus::NONE, {}, getExpectedTiming(mSuccess, false).first);
496                 return Void();
497             case Success::FAIL_WAIT:
498                 // While this is a synchronous execution method, the NNAPI
499                 // runtime may call it even for asynchronous execution, so we
500                 // need to tolerate Success::FAIL_WAIT here, not just
501                 // Success::FAIL_LAUNCH.
502                 FALLTHROUGH_INTENDED;
503             case Success::FAIL_LAUNCH:
504                 dummyExecution();
505                 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kBadTiming);
506                 return Void();
507             default:
508                 ADD_FAILURE() << "Unexpected Success kind";
509                 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kBadTiming);
510                 return Void();
511         }
512     }
513 
executeSynchronously_1_3(const V1_3::Request &,MeasureTiming measure,const OptionalTimePoint &,const OptionalTimeoutDuration &,executeSynchronously_1_3_cb cb)514     Return<void> executeSynchronously_1_3(const V1_3::Request&, MeasureTiming measure,
515                                           const OptionalTimePoint&, const OptionalTimeoutDuration&,
516                                           executeSynchronously_1_3_cb cb) override {
517         const auto wrappedCb = [&cb](V1_0::ErrorStatus status,
518                                      const hidl_vec<OutputShape>& outputShapes, Timing timing) {
519             cb(convertToV1_3(status), outputShapes, timing);
520         };
521         // Use a placeholder V1_0::Request because executeSynchronously ignores request entirely.
522         return executeSynchronously(V1_0::Request{}, measure, wrappedCb);
523     }
524 
525     // ExecutionBurstServer::create has an overload that will use
526     // IPreparedModel::executeSynchronously(), so we can rely on that, rather
527     // than having to implement ExecutionBurstServer::IExecutorWithCache.
configureExecutionBurst(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,configureExecutionBurst_cb cb)528     Return<void> configureExecutionBurst(
529             const sp<V1_2::IBurstCallback>& callback,
530             const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
531             const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
532             configureExecutionBurst_cb cb) override {
533         const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create(
534                 callback, requestChannel, resultChannel, this, std::chrono::microseconds{0});
535 
536         cb(burst == nullptr ? V1_0::ErrorStatus::GENERAL_FAILURE : V1_0::ErrorStatus::NONE, burst);
537         return Void();
538     }
539 
executeFenced(const Request &,const hidl_vec<hidl_handle> &,MeasureTiming measure,const OptionalTimePoint &,const OptionalTimeoutDuration &,const OptionalTimeoutDuration &,executeFenced_cb callback)540     Return<void> executeFenced(const Request&, const hidl_vec<hidl_handle>&, MeasureTiming measure,
541                                const OptionalTimePoint&, const OptionalTimeoutDuration&,
542                                const OptionalTimeoutDuration&, executeFenced_cb callback) override {
543         EXPECT_EQ(measure, MeasureTiming::YES);
544         if (hasBit(mSuccess, Success::PASS_BIT)) {
545             dummyExecution();
546             const auto expectedTiming = getExpectedTiming(mSuccess, true);
547             sp<SampleFencedExecutionCallback> fencedExecutionCallback =
548                     new SampleFencedExecutionCallback(expectedTiming.first, expectedTiming.second,
549                                                       V1_3::ErrorStatus::NONE);
550             callback(V1_3::ErrorStatus::NONE, hidl_handle(nullptr), fencedExecutionCallback);
551             return Void();
552         }
553         switch (mSuccess) {
554             case Success::FAIL_WAIT:
555                 // Due to the limitation of the SampleDriver,
556                 // FAIL_WAIT behaves the same as FAIL_LAUNCH.
557                 // If the SampleDriver is updated to return real
558                 // sync fences, this must be updated.
559                 FALLTHROUGH_INTENDED;
560             case Success::FAIL_LAUNCH:
561                 dummyExecution();
562                 callback(V1_3::ErrorStatus::GENERAL_FAILURE, hidl_handle(nullptr), nullptr);
563                 return Void();
564             default:
565                 ADD_FAILURE() << "Unexpected Success kind";
566                 return Void();
567         }
568     }
569 
570     // We can place the TestPreparedModelLatest system in a "pause" mode where
571     // no execution will complete until the system is taken out of that mode.
572     // Initially, the system is not in that mode.
pauseExecutions(bool v)573     static void pauseExecutions(bool v) { mPauseExecutions.store(v); }
574 
575     // This function is only guaranteed to work in the following pattern:
576     // - pauseExecutions(true);
577     // - // launch execution
578     // - // thread A: waitForExecutionToBegin()
579     // - // thread B: pauseExecutions(false);
waitForExecutionToBegin()580     static void waitForExecutionToBegin() {
581         CHECK(mPauseExecutions.load());
582         while (mExecutionsInFlight.load() == 0) {
583         }
584     }
585 
586    private:
587     Success mSuccess;
588 
589     static std::atomic<bool> mPauseExecutions;
590     static std::atomic<unsigned int> mExecutionsInFlight;
591 
dummyExecution()592     static void dummyExecution() {
593         CHECK_EQ(mExecutionsInFlight.fetch_add(1), 0u) << "We do not support concurrent executions";
594         while (mPauseExecutions.load()) {
595         }
596         mExecutionsInFlight.fetch_sub(1);
597     }
598 };
599 std::atomic<bool> TestPreparedModelLatest::mPauseExecutions = false;
600 std::atomic<unsigned int> TestPreparedModelLatest::mExecutionsInFlight = 0;
601 
602 using TestPreparedModel13 = TestPreparedModelLatest;
603 
604 // Like TestPreparedModelLatest, but implementing 1.2
605 class TestPreparedModel12 : public V1_2::IPreparedModel {
606    public:
TestPreparedModel12(const HidlModel & model,const SampleDriver * driver,Success success)607     TestPreparedModel12(const HidlModel& model, const SampleDriver* driver, Success success)
608         : mLatestPreparedModel(new TestPreparedModelLatest(model, driver, success)) {}
609 
execute(const V1_0::Request & request,const sp<V1_0::IExecutionCallback> & callback)610     Return<V1_0::ErrorStatus> execute(const V1_0::Request& request,
611                                       const sp<V1_0::IExecutionCallback>& callback) override {
612         return mLatestPreparedModel->execute(request, callback);
613     }
614 
execute_1_2(const V1_0::Request & request,MeasureTiming measure,const sp<V1_2::IExecutionCallback> & callback)615     Return<V1_0::ErrorStatus> execute_1_2(const V1_0::Request& request, MeasureTiming measure,
616                                           const sp<V1_2::IExecutionCallback>& callback) override {
617         return mLatestPreparedModel->execute_1_2(request, measure, callback);
618     }
619 
executeSynchronously(const V1_0::Request & request,MeasureTiming measure,executeSynchronously_cb cb)620     Return<void> executeSynchronously(const V1_0::Request& request, MeasureTiming measure,
621                                       executeSynchronously_cb cb) override {
622         return mLatestPreparedModel->executeSynchronously(request, measure, cb);
623     }
624 
configureExecutionBurst(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,configureExecutionBurst_cb cb)625     Return<void> configureExecutionBurst(
626             const sp<V1_2::IBurstCallback>& callback,
627             const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
628             const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
629             configureExecutionBurst_cb cb) override {
630         return mLatestPreparedModel->configureExecutionBurst(callback, requestChannel,
631                                                              resultChannel, cb);
632     }
633 
634    private:
635     const sp<IPreparedModel> mLatestPreparedModel;
636 };
637 
638 // Like TestPreparedModelLatest, but implementing 1.0
639 class TestPreparedModel10 : public V1_0::IPreparedModel {
640    public:
TestPreparedModel10(const HidlModel & model,const SampleDriver * driver,Success success)641     TestPreparedModel10(const HidlModel& model, const SampleDriver* driver, Success success)
642         : mLatestPreparedModel(new TestPreparedModelLatest(model, driver, success)) {}
643 
execute(const V1_0::Request & request,const sp<V1_0::IExecutionCallback> & callback)644     Return<V1_0::ErrorStatus> execute(const V1_0::Request& request,
645                                       const sp<V1_0::IExecutionCallback>& callback) override {
646         return mLatestPreparedModel->execute(request, callback);
647     }
648 
649    private:
650     const sp<IPreparedModel> mLatestPreparedModel;
651 };
652 
653 // Behaves like SampleDriver, except that it produces customized IPrepareModel.
654 class TestDriver13 : public SampleDriver {
655    public:
TestDriver13(const std::string & name,Success success)656     TestDriver13(const std::string& name, Success success)
657         : SampleDriver(name.c_str()), mSuccess(success) {}
658 
getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb)659     Return<void> getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb) override {
660         android::nn::initVLogMask();
661         const PerformanceInfo kPerf = {.execTime = 0.75f, .powerUsage = 0.75f};
662         Capabilities capabilities = {
663                 .relaxedFloat32toFloat16PerformanceScalar = kPerf,
664                 .relaxedFloat32toFloat16PerformanceTensor = kPerf,
665                 .operandPerformance =
666                         nn::nonExtensionOperandPerformance<nn::HalVersion::V1_3>(kPerf)};
667         _hidl_cb(V1_3::ErrorStatus::NONE, capabilities);
668         return Void();
669     }
670 
getSupportedOperations_1_3(const HidlModel & model,getSupportedOperations_1_3_cb cb)671     Return<void> getSupportedOperations_1_3(const HidlModel& model,
672                                             getSupportedOperations_1_3_cb cb) override {
673         if (nn::validateModel(model)) {
674             std::vector<bool> supported(model.main.operations.size(), true);
675             cb(V1_3::ErrorStatus::NONE, supported);
676         } else {
677             cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {});
678         }
679         return Void();
680     }
681 
getSupportedOperations_1_2(const V1_2::Model & model,getSupportedOperations_1_2_cb cb)682     Return<void> getSupportedOperations_1_2(const V1_2::Model& model,
683                                             getSupportedOperations_1_2_cb cb) override {
684         if (nn::validateModel(model)) {
685             std::vector<bool> supported(model.operations.size(), true);
686             cb(V1_0::ErrorStatus::NONE, supported);
687         } else {
688             std::vector<bool> supported;
689             cb(V1_0::ErrorStatus::INVALID_ARGUMENT, supported);
690         }
691         return Void();
692     }
693 
prepareModel_1_3(const HidlModel & model,ExecutionPreference,Priority,const OptionalTimePoint &,const hidl_vec<hidl_handle> &,const hidl_vec<hidl_handle> &,const CacheToken &,const sp<V1_3::IPreparedModelCallback> & callback)694     Return<V1_3::ErrorStatus> prepareModel_1_3(
695             const HidlModel& model, ExecutionPreference, Priority, const OptionalTimePoint&,
696             const hidl_vec<hidl_handle>&, const hidl_vec<hidl_handle>&, const CacheToken&,
697             const sp<V1_3::IPreparedModelCallback>& callback) override {
698         callback->notify_1_3(V1_3::ErrorStatus::NONE,
699                              new TestPreparedModel13(model, this, mSuccess));
700         return V1_3::ErrorStatus::NONE;
701     }
702 
prepareModel_1_2(const V1_2::Model & model,ExecutionPreference,const hidl_vec<hidl_handle> &,const hidl_vec<hidl_handle> &,const CacheToken &,const sp<V1_2::IPreparedModelCallback> & callback)703     Return<V1_0::ErrorStatus> prepareModel_1_2(
704             const V1_2::Model& model, ExecutionPreference, const hidl_vec<hidl_handle>&,
705             const hidl_vec<hidl_handle>&, const CacheToken&,
706             const sp<V1_2::IPreparedModelCallback>& callback) override {
707         callback->notify_1_2(V1_0::ErrorStatus::NONE,
708                              new TestPreparedModel12(nn::convertToV1_3(model), this, mSuccess));
709         return V1_0::ErrorStatus::NONE;
710     }
711 
prepareModel_1_1(const V1_1::Model & model,ExecutionPreference,const sp<V1_0::IPreparedModelCallback> & callback)712     Return<V1_0::ErrorStatus> prepareModel_1_1(
713             const V1_1::Model& model, ExecutionPreference,
714             const sp<V1_0::IPreparedModelCallback>& callback) override {
715         callback->notify(V1_0::ErrorStatus::NONE,
716                          new TestPreparedModel10(nn::convertToV1_3(model), this, mSuccess));
717         return V1_0::ErrorStatus::NONE;
718     }
719 
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & callback)720     Return<V1_0::ErrorStatus> prepareModel(
721             const V1_0::Model& model, const sp<V1_0::IPreparedModelCallback>& callback) override {
722         return prepareModel_1_1(nn::convertToV1_1(model), ExecutionPreference::FAST_SINGLE_ANSWER,
723                                 callback);
724     }
725 
726    private:
727     Success mSuccess;
728 };
729 
730 // Like TestDriver, but implementing 1.1
731 class TestDriver11 : public V1_1::IDevice {
732    public:
TestDriver11(const std::string & name,Success success)733     TestDriver11(const std::string& name, Success success)
734         : mLatestDriver(new TestDriver13(name, success)) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)735     Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
736         return mLatestDriver->getCapabilities_1_1(_hidl_cb);
737     }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)738     Return<void> getSupportedOperations_1_1(const V1_1::Model& model,
739                                             getSupportedOperations_1_1_cb _hidl_cb) override {
740         return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
741     }
prepareModel_1_1(const V1_1::Model & model,ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)742     Return<V1_0::ErrorStatus> prepareModel_1_1(
743             const V1_1::Model& model, ExecutionPreference preference,
744             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
745         return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
746     }
getStatus()747     Return<DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)748     Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
749         return mLatestDriver->getCapabilities(_hidl_cb);
750     }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)751     Return<void> getSupportedOperations(const V1_0::Model& model,
752                                         getSupportedOperations_cb _hidl_cb) override {
753         return mLatestDriver->getSupportedOperations(model, _hidl_cb);
754     }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)755     Return<V1_0::ErrorStatus> prepareModel(
756             const V1_0::Model& model,
757             const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
758         return mLatestDriver->prepareModel(model, actualCallback);
759     }
760 
761    private:
762     const sp<V1_3::IDevice> mLatestDriver;
763 };
764 
765 }  // namespace test_drivers
766 
767 /*-- End   test drivers -------------------------------------------------------------------------*/
768 
769 /*-- Begin timing tests -------------------------------------------------------------------------*/
770 
771 namespace timing_tests {
772 
773 using namespace test_drivers;
774 
775 enum class DriverKind {
776     CPU,
777     OLD,  // too old to support timing (1.1 or earlier)
778     NEW   // new enough to support timing (1.2 or later)
779 };
780 
operator <<(std::ostream & os,DriverKind kind)781 std::ostream& operator<<(std::ostream& os, DriverKind kind) {
782     const char* names[] = {"CPU", "OLD", "NEW"};
783     const uint32_t index = static_cast<uint32_t>(kind);
784     CHECK(index < std::size(names));
785     return os << names[index];
786 }
787 
788 enum class Compute { ASYNC, SYNC, BURST, FENCED };
789 
operator <<(std::ostream & os,Compute compute)790 std::ostream& operator<<(std::ostream& os, Compute compute) {
791     const char* names[] = {"ASYNC", "SYNC", "BURST", "FENCED"};
792     const uint32_t index = static_cast<uint32_t>(compute);
793     CHECK(index < std::size(names));
794     return os << names[index];
795 }
796 
797 class TimingTest : public IntrospectionControlTest,
798                    public ::testing::WithParamInterface<std::tuple<DriverKind, Success, Compute>> {
799    public:
TimingTest()800     TimingTest()
801         : kDriverKind(std::get<0>(GetParam())),
802           kSuccess(std::get<1>(GetParam())),
803           kCompute(std::get<2>(GetParam())) {}
804 
805    protected:
806     const DriverKind kDriverKind;
807     const Success kSuccess;
808     const Compute kCompute;
809 };
810 
TEST_P(TimingTest,Test)811 TEST_P(TimingTest, Test) {
812     // There's no straightforward way to force CPU execution to fail.
813     ASSERT_EQ(kDriverKind == DriverKind::CPU, kSuccess == Success::PASS_CPU);
814 
815     // FAIL_WAIT only makes sense for ASYNC and FENCED.
816     ASSERT_TRUE(kCompute == Compute::ASYNC || kCompute == Compute::FENCED ||
817                 kSuccess != Success::FAIL_WAIT);
818 
819     if (DeviceManager::get()->getUseCpuOnly() != (kDriverKind == DriverKind::CPU)) {
820         // We don't have an elegant way to request the CPU driver.  Therefore,
821         // we rely on our test framework to make the choice between CPU and
822         // non-CPU.
823         GTEST_SKIP();
824     }
825 
826     createSimpleAddModel(&mModel);
827 
828     switch (kDriverKind) {
829         case DriverKind::CPU: {
830             // There should be only one driver -- the CPU
831             const std::string& name = DeviceManager::get()->getDrivers()[0]->getName();
832             ASSERT_TRUE(selectDeviceByName(name));
833             break;
834         }
835         case DriverKind::OLD: {
836             static const char name[] = "old";
837             DeviceManager::get()->forTest_registerDevice(name, new TestDriver11(name, kSuccess));
838             ASSERT_TRUE(selectDeviceByName(name));
839             break;
840         }
841         case DriverKind::NEW: {
842             static const char name[] = "new";
843             DeviceManager::get()->forTest_registerDevice(name, new TestDriver13(name, kSuccess));
844             ASSERT_TRUE(selectDeviceByName(name));
845             break;
846         }
847         default:
848             FAIL() << "Unexpected DriverKind";
849     }
850 
851     EXPECT_EQ(prepareForExecution(true /*measureTiming*/), ANEURALNETWORKS_NO_ERROR);
852 
853     float input1[2] = {1.0f, 2.0f};
854     float input2[2] = {3.0f, 4.0f};
855     float output[2];
856     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
857               ANEURALNETWORKS_NO_ERROR);
858     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
859               ANEURALNETWORKS_NO_ERROR);
860     EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
861               ANEURALNETWORKS_NO_ERROR);
862     EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
863               ANEURALNETWORKS_NO_ERROR);
864 
865     auto Check = [](bool expectPass, int result) {
866         if (expectPass) {
867             ASSERT_EQ(result, ANEURALNETWORKS_NO_ERROR);
868         } else {
869             ASSERT_NE(result, ANEURALNETWORKS_NO_ERROR);
870         }
871     };
872 
873     const bool isPass = hasBit(kSuccess, Success::PASS_BIT);
874     const int expectedGetDurationResultCode =
875             isPass ? ANEURALNETWORKS_NO_ERROR : ANEURALNETWORKS_BAD_STATE;
876 
877     const auto getDurationWhileRunning = [this] {
878         if (kDriverKind == DriverKind::CPU) {
879             // Testing DriverKind::CPU would require modifying the CPU execution
880             // path to control execution completion, similarly to how this test
881             // case does with TestPreparedModel::dummyExecution(). This does not
882             // seem worthwhile -- it's intrusive into the runtime code solely
883             // for the sake of testing, and we do not expect that the code paths
884             // needed to ensure correct behavior of
885             // ANeuralNetworksExecution_getDuration() on a running execution
886             // would be any different for CPU than for actual drivers.
887             return;
888         }
889         TestPreparedModelLatest::waitForExecutionToBegin();
890         for (int durationCode :
891              std::vector{ANEURALNETWORKS_DURATION_ON_HARDWARE, ANEURALNETWORKS_DURATION_IN_DRIVER,
892                          ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE,
893                          ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER}) {
894             uint64_t time;
895             // Cannot query duration while execution is running
896             EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, durationCode, &time),
897                       ANEURALNETWORKS_BAD_STATE);
898         }
899     };
900 
901     switch (kCompute) {
902         case Compute::ASYNC: {
903             // Ideally what we'd like to do here is
904             //
905             //     Check(kSuccess != Success::FAIL_LAUNCH,
906             //         ANeuralNetworksExecution_startCompute(mExecution, &mEvent));
907             //     Check(isPass, ANeuralNetworksEvent_wait(mEvent));
908             //
909             // However, in the current implementation of the runtime, a launch
910             // failure at the HAL level does not show up as a launch failure at
911             // the NDK level ("startCompute"): The NNAPI runtime does not call a
912             // driver until it (the runtime) begins execution, so a launch
913             // failure at the HAL level looks like an execution failure at the
914             // NDK level ("wait").
915             SCOPED_TRACE("ASYNC startCompute");
916             TestPreparedModelLatest::pauseExecutions(true);
917             Check(true,  // rather than kSuccess != Success::FAIL_LAUNCH
918                   ANeuralNetworksExecution_startCompute(mExecution, &mEvent));
919             getDurationWhileRunning();
920             TestPreparedModelLatest::pauseExecutions(false);
921             SCOPED_TRACE("ASYNC wait");
922             Check(isPass, ANeuralNetworksEvent_wait(mEvent));
923             break;
924         }
925         case Compute::SYNC: {
926             SCOPED_TRACE("SYNC");
927             TestPreparedModelLatest::pauseExecutions(true);
928             std::thread run([this, Check, isPass] {
929                 Check(isPass, ANeuralNetworksExecution_compute(mExecution));
930             });
931             getDurationWhileRunning();
932             TestPreparedModelLatest::pauseExecutions(false);
933             run.join();
934             break;
935         }
936         case Compute::BURST: {
937             SCOPED_TRACE("BURST");
938             ANeuralNetworksBurst* burst;
939             ASSERT_EQ(ANeuralNetworksBurst_create(mCompilation, &burst), ANEURALNETWORKS_NO_ERROR);
940             TestPreparedModelLatest::pauseExecutions(true);
941             std::thread run([this, Check, isPass, burst] {
942                 Check(isPass, ANeuralNetworksExecution_burstCompute(mExecution, burst));
943             });
944             getDurationWhileRunning();
945             TestPreparedModelLatest::pauseExecutions(false);
946             run.join();
947             ANeuralNetworksBurst_free(burst);
948             break;
949         }
950         case Compute::FENCED: {
951             SCOPED_TRACE("FENCED startComputeWithDependencies");
952             TestPreparedModelLatest::pauseExecutions(true);
953 
954             // Note, due to the limitation of SampleDriver implementation, the call is synchronous.
955             // If the SampleDriver is updated to return real sync fence, this must be updated.
956             std::thread run([this, Check, isPass] {
957                 Check(isPass, ANeuralNetworksExecution_startComputeWithDependencies(
958                                       mExecution, nullptr, 0, 0, &mEvent));
959             });
960             getDurationWhileRunning();
961             TestPreparedModelLatest::pauseExecutions(false);
962             run.join();
963             SCOPED_TRACE("FENCED wait");
964             Check(isPass, ANeuralNetworksEvent_wait(mEvent));
965             break;
966         }
967         default:
968             FAIL() << "unreachable";
969     }
970 
971     uint64_t timeOnHardware, timeInDriver, timeOnHardwareFenced, timeInDriverFenced;
972     EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_ON_HARDWARE,
973                                                    &timeOnHardware),
974               expectedGetDurationResultCode);
975     EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_IN_DRIVER,
976                                                    &timeInDriver),
977               expectedGetDurationResultCode);
978     EXPECT_EQ(
979             ANeuralNetworksExecution_getDuration(
980                     mExecution, ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE, &timeOnHardwareFenced),
981             expectedGetDurationResultCode);
982     EXPECT_EQ(ANeuralNetworksExecution_getDuration(
983                       mExecution, ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER, &timeInDriverFenced),
984               expectedGetDurationResultCode);
985     switch (kDriverKind) {
986         case DriverKind::CPU: {
987             // TODO: Should we require timing to be reported as 0?
988             EXPECT_TRUE(timeOnHardware == 0 || timeOnHardware == UINT64_MAX)
989                     << "timeOnHardware = " << timeOnHardware;
990             EXPECT_TRUE(timeInDriver == 0 || timeInDriver == UINT64_MAX)
991                     << "timeInDriver = " << timeOnHardware;
992             EXPECT_TRUE(timeOnHardwareFenced == 0 || timeOnHardwareFenced == UINT64_MAX)
993                     << "timeOnHardwareFenced = " << timeOnHardwareFenced;
994             EXPECT_TRUE(timeInDriverFenced == 0 || timeInDriverFenced == UINT64_MAX)
995                     << "timeInDriver = " << timeInDriverFenced;
996             break;
997         }
998         case DriverKind::OLD: {
999             EXPECT_EQ(timeOnHardware, UINT64_MAX);
1000             EXPECT_EQ(timeInDriver, UINT64_MAX);
1001             EXPECT_EQ(timeOnHardwareFenced, UINT64_MAX);
1002             EXPECT_EQ(timeInDriverFenced, UINT64_MAX);
1003             break;
1004         }
1005         case DriverKind::NEW: {
1006             auto microsToNanos = [](uint64_t micros) {
1007                 constexpr uint64_t kNanosPerMicro = 1000;
1008                 return micros == UINT64_MAX ? UINT64_MAX : kNanosPerMicro * micros;
1009             };
1010             auto expectedTiming = getExpectedTiming(kSuccess, kCompute == Compute::FENCED);
1011             EXPECT_EQ(timeOnHardware, microsToNanos(expectedTiming.first.timeOnDevice));
1012             EXPECT_EQ(timeInDriver, microsToNanos(expectedTiming.first.timeInDriver));
1013             EXPECT_EQ(timeOnHardwareFenced, microsToNanos(expectedTiming.second.timeOnDevice));
1014             EXPECT_EQ(timeInDriverFenced, microsToNanos(expectedTiming.second.timeInDriver));
1015             break;
1016         }
1017         default:
1018             FAIL() << "unreachable";
1019     }
1020     if (kCompute != Compute::FENCED) {
1021         EXPECT_EQ(timeOnHardware, timeOnHardwareFenced);
1022         EXPECT_EQ(timeInDriver, timeInDriverFenced);
1023     }
1024     auto expectTimingLe = [](uint64_t a, const char* aName, uint64_t b, const char* bName) {
1025         if (a != UINT64_MAX && b != UINT64_MAX) {
1026             EXPECT_LE(a, b) << aName << " exceeds " << bName;
1027         }
1028     };
1029 #define EXPECT_TIMING_LE(a, b) expectTimingLe(a, #a, b, #b)
1030     EXPECT_TIMING_LE(timeOnHardware, timeInDriver);
1031     EXPECT_TIMING_LE(timeOnHardwareFenced, timeInDriverFenced);
1032 
1033     EXPECT_TIMING_LE(timeOnHardwareFenced, timeOnHardware);
1034     EXPECT_TIMING_LE(timeInDriverFenced, timeInDriver);
1035 #undef EXPECT_TIMING_LE
1036 }
1037 
1038 auto kTimingTestUnfencedValues = ::testing::Values(
1039         // NOTE: We cannot force CPU execution to fail
1040         std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::ASYNC),
1041         std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::SYNC),
1042         std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::BURST),
1043 
1044         // NOTE: OLD driver does not provide timing
1045         std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::ASYNC),
1046         std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::SYNC),
1047         std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::BURST),
1048 
1049         std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::ASYNC),
1050         std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::SYNC),
1051         std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::BURST),
1052 
1053         // NOTE: Only ASYNC is paired with a wait
1054         std::make_tuple(DriverKind::OLD, Success::FAIL_WAIT, Compute::ASYNC),
1055 
1056         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::ASYNC),
1057         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::SYNC),
1058         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::BURST),
1059 
1060         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::ASYNC),
1061         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::SYNC),
1062         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::BURST),
1063 
1064         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::ASYNC),
1065         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::SYNC),
1066         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::BURST),
1067 
1068         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::ASYNC),
1069         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::SYNC),
1070         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::BURST),
1071 
1072         std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::ASYNC),
1073         std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::SYNC),
1074         std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::BURST),
1075 
1076         // NOTE: Only ASYNC is paired with a wait
1077         std::make_tuple(DriverKind::NEW, Success::FAIL_WAIT, Compute::ASYNC));
1078 
1079 auto kTimingTestFencedValues = ::testing::Values(
1080         // NOTE: We cannot force CPU execution to fail
1081         std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::FENCED),
1082 
1083         // NOTE: OLD driver does not provide timing
1084         std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::FENCED),
1085 
1086         std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::FENCED),
1087 
1088         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::FENCED),
1089         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::FENCED),
1090         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::FENCED),
1091         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::FENCED),
1092         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_DEVICE, Compute::FENCED),
1093         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_DRIVER, Compute::FENCED),
1094         std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_BOTH, Compute::FENCED),
1095         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_DEVICE, Compute::FENCED),
1096         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_DRIVER, Compute::FENCED),
1097         std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_BOTH, Compute::FENCED),
1098         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_DEVICE, Compute::FENCED),
1099         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_DRIVER, Compute::FENCED),
1100         std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_BOTH, Compute::FENCED),
1101         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_DEVICE, Compute::FENCED),
1102         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_DRIVER, Compute::FENCED),
1103         std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_BOTH, Compute::FENCED),
1104 
1105         std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::FENCED));
1106 
1107 INSTANTIATE_TEST_CASE_P(Unfenced, TimingTest, kTimingTestUnfencedValues);
1108 INSTANTIATE_TEST_CASE_P(Fenced, TimingTest, kTimingTestFencedValues);
1109 
1110 }  // namespace timing_tests
1111 
1112 /*-- End   timing tests -------------------------------------------------------------------------*/
1113 
1114 const float kSimpleCeiling = 2.0f;
1115 
createAddMaxModel(WrapperModel * model,bool reverseOrder)1116 void createAddMaxModel(WrapperModel* model, bool reverseOrder) {
1117     WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
1118     WrapperOperandType type1(WrapperType::INT32, {});
1119     // Phase 1, operands
1120     auto op1 = model->addOperand(&type0);
1121     auto op2 = model->addOperand(&type0);
1122     auto act = model->addOperand(&type1);
1123     auto op3 = model->addOperand(&type0);
1124     auto op4 = model->addOperand(&type0);
1125     auto op5 = model->addOperand(&type0);
1126     // Phase 2, operations
1127     static int32_t act_init[] = {0};
1128     model->setOperandValue(act, act_init, sizeof(act_init));
1129     static float ceiling[] = {kSimpleCeiling, kSimpleCeiling};
1130     model->setOperandValue(op4, ceiling, sizeof(ceiling));
1131     if (reverseOrder) {
1132         // In this case, add MAXIMUM first, but the execution order is still ADD -> MAXIMUM.
1133         model->addOperation(ANEURALNETWORKS_MAXIMUM, {op3, op4}, {op5});
1134         model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1135     } else {
1136         model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1137         model->addOperation(ANEURALNETWORKS_MAXIMUM, {op3, op4}, {op5});
1138     }
1139     // Phase 3, inputs and outputs
1140     model->identifyInputsAndOutputs({op1, op2}, {op5});
1141     model->finish();
1142     ASSERT_TRUE(model->isValid());
1143 }
1144 
TEST_F(IntrospectionControlTest,SlicingAddMax)1145 TEST_F(IntrospectionControlTest, SlicingAddMax) {
1146     // This is needed before we have the CPU fallback path being treated as a Device.
1147     if (DeviceManager::get()->getUseCpuOnly()) {
1148         GTEST_SKIP();
1149     }
1150 
1151     using namespace test_drivers;
1152 
1153     static const char name[] = "driver11";
1154     DeviceManager::get()->forTest_registerDevice(name, new TestDriver11(name, Success::PASS_BOTH));
1155     ASSERT_TRUE(selectDeviceByName(name));
1156 
1157     createAddMaxModel(&mModel, false);
1158     EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1159 }
1160 
TEST_F(IntrospectionControlTest,SlicingMaxAdd)1161 TEST_F(IntrospectionControlTest, SlicingMaxAdd) {
1162     // This is needed before we have the CPU fallback path being treated as a Device.
1163     if (DeviceManager::get()->getUseCpuOnly()) {
1164         GTEST_SKIP();
1165     }
1166 
1167     using namespace test_drivers;
1168 
1169     static const char name[] = "driver11";
1170     DeviceManager::get()->forTest_registerDevice(name, new TestDriver11(name, Success::PASS_BOTH));
1171     ASSERT_TRUE(selectDeviceByName(name));
1172 
1173     createAddMaxModel(&mModel, true);
1174     EXPECT_TRUE(isSupportedOpListExpected({false, true}));
1175 }
1176 
1177 const float kSimpleMultiplier = 2.0f;
1178 
createAddMulModel(WrapperModel * model,bool reverseOrder)1179 void createAddMulModel(WrapperModel* model, bool reverseOrder) {
1180     WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
1181     WrapperOperandType type1(WrapperType::INT32, {});
1182     // Phase 1, operands
1183     auto op1 = model->addOperand(&type0);
1184     auto op2 = model->addOperand(&type0);
1185     auto act = model->addOperand(&type1);
1186     auto op3 = model->addOperand(&type0);
1187     auto op4 = model->addOperand(&type0);
1188     auto op5 = model->addOperand(&type0);
1189     // Phase 2, operations
1190     static int32_t act_init[] = {0};
1191     model->setOperandValue(act, act_init, sizeof(act_init));
1192     static float multiplier[] = {kSimpleMultiplier, kSimpleMultiplier};
1193     model->setOperandValue(op4, multiplier, sizeof(multiplier));
1194     if (reverseOrder) {
1195         // In this case, add MUL first, but the execution order is still ADD -> MUL.
1196         model->addOperation(ANEURALNETWORKS_MUL, {op3, op4, act}, {op5});
1197         model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1198     } else {
1199         model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1200         model->addOperation(ANEURALNETWORKS_MUL, {op3, op4, act}, {op5});
1201     }
1202     // Phase 3, inputs and outputs
1203     model->identifyInputsAndOutputs({op1, op2}, {op5});
1204     model->finish();
1205     ASSERT_TRUE(model->isValid());
1206 }
1207 
1208 // TODO(miaowang): add a test to make sure ANNCompilation_create() has CPU
1209 // fallback.
1210 // This test verifies that a device that could only handle ADD would correctly report that an
1211 // ADD->MUL model could not be fully supported.
TEST_F(IntrospectionControlTest,PartialModelNotSupported)1212 TEST_F(IntrospectionControlTest, PartialModelNotSupported) {
1213     // This is needed before we have the CPU fallback path being treated as a Device.
1214     if (DeviceManager::get()->getUseCpuOnly()) {
1215         GTEST_SKIP();
1216     }
1217 
1218     createAddMulModel(&mModel, false);
1219 
1220     std::string addOnlyDriver = "test-onlyAdd";
1221     std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1222     addOnlyOp[ANEURALNETWORKS_ADD] = true;
1223 
1224     registerDevices({{addOnlyDriver, 0.9, addOnlyOp}});
1225 
1226     EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1227     EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1228 
1229     ANeuralNetworksModel* modelHandle = mModel.getHandle();
1230     EXPECT_EQ(ANeuralNetworksCompilation_createForDevices(modelHandle, mDevices.data(),
1231                                                           mDevices.size(), &mCompilation),
1232               ANEURALNETWORKS_NO_ERROR);
1233     // The compilation must fail as there is no fallback when using
1234     // Introspection API.
1235     EXPECT_NE(ANeuralNetworksCompilation_finish(mCompilation), ANEURALNETWORKS_NO_ERROR);
1236 }
1237 
1238 // This test verifies that a device that could only handle ADD would correctly report that an
1239 // ADD->MUL model could not be fully supported. Also verifies that the indices of returned
1240 // supported op list correctly map to the order of operations being added by the user.
TEST_F(IntrospectionControlTest,PartialModelNotSupportedOrder)1241 TEST_F(IntrospectionControlTest, PartialModelNotSupportedOrder) {
1242     // This is needed before we have the CPU fallback path being treated as a Device.
1243     if (DeviceManager::get()->getUseCpuOnly()) {
1244         GTEST_SKIP();
1245     }
1246 
1247     createAddMulModel(&mModel, true);
1248 
1249     std::string addOnlyDriver = "test-onlyAdd";
1250     std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1251     addOnlyOp[ANEURALNETWORKS_ADD] = true;
1252 
1253     registerDevices({{addOnlyDriver, 0.9, addOnlyOp}});
1254 
1255     EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1256     EXPECT_TRUE(isSupportedOpListExpected({false, true}));
1257 }
1258 
1259 // TODO(miaowang): update the test to make sure the model is actually running on the test devices.
1260 // This test verifies that an ADD->MUL model is able to run on two selected devices that together
1261 // can handle all operations.
TEST_F(IntrospectionControlTest,ModelNeedTwoDevices)1262 TEST_F(IntrospectionControlTest, ModelNeedTwoDevices) {
1263     // This is needed before we have the CPU fallback path being treated as a Device.
1264     if (DeviceManager::get()->getUseCpuOnly()) {
1265         GTEST_SKIP();
1266     }
1267 
1268     createAddMulModel(&mModel, false);
1269 
1270     std::string addOnlyDriver = "test-onlyAdd";
1271     std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1272     addOnlyOp[ANEURALNETWORKS_ADD] = true;
1273 
1274     std::string mulOnlyDriver = "test-onlyMul";
1275     std::vector<bool> mulOnlyOp(android::nn::kNumberOfOperationTypes, false);
1276     mulOnlyOp[ANEURALNETWORKS_MUL] = true;
1277 
1278     registerDevices({
1279             {addOnlyDriver, 0.9, addOnlyOp},
1280             {mulOnlyDriver, 0.9, mulOnlyOp},
1281     });
1282 
1283     EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1284     EXPECT_TRUE(selectDeviceByName(mulOnlyDriver));
1285     EXPECT_TRUE(isSupportedOpListExpected({true, true}));
1286     EXPECT_EQ(prepareForExecution(), ANEURALNETWORKS_NO_ERROR);
1287 
1288     float input1[2] = {1.0f, 2.0f};
1289     float input2[2] = {3.0f, 4.0f};
1290     float output[2];
1291     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
1292               ANEURALNETWORKS_NO_ERROR);
1293     EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
1294               ANEURALNETWORKS_NO_ERROR);
1295     EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
1296               ANEURALNETWORKS_NO_ERROR);
1297 
1298     EXPECT_EQ(ANeuralNetworksExecution_startCompute(mExecution, &mEvent), ANEURALNETWORKS_NO_ERROR);
1299     EXPECT_EQ(ANeuralNetworksEvent_wait(mEvent), ANEURALNETWORKS_NO_ERROR);
1300     EXPECT_EQ(output[0], kSimpleMultiplier * (input1[0] + input2[0]));
1301     EXPECT_EQ(output[1], kSimpleMultiplier * (input1[1] + input2[1]));
1302 }
1303 }  // namespace
1304