1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <gtest/gtest.h>
18
19 #include <chrono>
20 #include <iterator>
21 #include <map>
22 #include <queue>
23 #include <set>
24 #include <string>
25 #include <thread>
26 #include <tuple>
27 #include <utility>
28 #include <vector>
29
30 #include "CompilationBuilder.h"
31 #include "ExecutionBurstServer.h"
32 #include "HalInterfaces.h"
33 #include "Manager.h"
34 #include "NeuralNetworks.h"
35 #include "NeuralNetworksOEM.h"
36 #include "SampleDriver.h"
37 #include "TestNeuralNetworksWrapper.h"
38 #include "Utils.h"
39 #include "ValidateHal.h"
40
41 namespace {
42
43 using namespace ::android;
44 using namespace nn::hal;
45
46 using CompilationBuilder = nn::CompilationBuilder;
47 using Device = nn::Device;
48 using DeviceManager = nn::DeviceManager;
49 using ExecutePreference = nn::test_wrapper::ExecutePreference;
50 using ExecutionBurstServer = nn::ExecutionBurstServer;
51 using HidlModel = V1_3::Model;
52 using PreparedModelCallback = nn::PreparedModelCallback;
53 using Result = nn::test_wrapper::Result;
54 using SampleDriver = nn::sample_driver::SampleDriver;
55 using SamplePreparedModel = nn::sample_driver::SamplePreparedModel;
56 using SampleFencedExecutionCallback = nn::sample_driver::SampleFencedExecutionCallback;
57 using WrapperModel = nn::test_wrapper::Model;
58 using WrapperOperandType = nn::test_wrapper::OperandType;
59 using WrapperType = nn::test_wrapper::Type;
60 using nn::convertToV1_0;
61 using nn::convertToV1_3;
62
63 template <typename T>
64 using MQDescriptorSync = hardware::MQDescriptorSync<T>;
65
66 constexpr Timing kBadTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
67 constexpr Timing kGoodUnfencedTiming = {.timeOnDevice = 123, .timeInDriver = 456};
68 constexpr Timing kGoodFencedTiming = {.timeOnDevice = 23, .timeInDriver = 56};
69
70 // This is an IDevice for testing purposes. The test driver has customized
71 // getCapabilities_1_3 and getSupportedOperations_1_3.
72 class TestDriver : public SampleDriver {
73 public:
TestDriver(const char * name,Capabilities capabilities,const std::vector<bool> & supportedOps)74 TestDriver(const char* name, Capabilities capabilities, const std::vector<bool>& supportedOps)
75 : SampleDriver(name), mCapabilities(capabilities), mSupportedOps(supportedOps) {}
~TestDriver()76 ~TestDriver() override {}
77
getCapabilities_1_3(getCapabilities_1_3_cb cb)78 Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override {
79 cb(V1_3::ErrorStatus::NONE, mCapabilities);
80 return Void();
81 }
82
getSupportedOperations_1_3(const Model & model,getSupportedOperations_1_3_cb cb)83 Return<void> getSupportedOperations_1_3(const Model& model,
84 getSupportedOperations_1_3_cb cb) override {
85 if (!android::nn::validateModel(model)) {
86 cb(V1_3::ErrorStatus::INVALID_ARGUMENT, std::vector<bool>());
87 return Void();
88 }
89 const size_t count = model.main.operations.size();
90 std::vector<bool> supported(count);
91 std::transform(
92 model.main.operations.begin(), model.main.operations.end(), supported.begin(),
93 [this](Operation op) { return mSupportedOps[static_cast<int32_t>(op.type)]; });
94 cb(V1_3::ErrorStatus::NONE, supported);
95 return Void();
96 }
97
98 private:
99 Capabilities mCapabilities;
100 std::vector<bool> mSupportedOps;
101 };
102
103 class IntrospectionControlTest : public ::testing::Test {
104 protected:
SetUp()105 virtual void SetUp() {}
TearDown()106 virtual void TearDown() {
107 if (mEvent) {
108 ANeuralNetworksEvent_free(mEvent);
109 }
110 if (mExecution) {
111 ANeuralNetworksExecution_free(mExecution);
112 }
113 if (mCompilation) {
114 ANeuralNetworksCompilation_free(mCompilation);
115 }
116 DeviceManager::get()->forTest_reInitializeDeviceList();
117 }
118
119 struct DeviceSpecification {
DeviceSpecification__anon736024e70111::IntrospectionControlTest::DeviceSpecification120 DeviceSpecification(const std::string& name, float perf, std::vector<bool>& supportedOps)
121 : mName(name), mSupportedOps(supportedOps) {
122 PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
123 mCapabilities = {
124 .relaxedFloat32toFloat16PerformanceScalar = perfInfo,
125 .relaxedFloat32toFloat16PerformanceTensor = perfInfo,
126 .operandPerformance =
127 nn::nonExtensionOperandPerformance<nn::HalVersion::V1_3>(perfInfo),
128 .ifPerformance = perfInfo,
129 .whilePerformance = perfInfo};
130 }
131 std::string mName;
132 Capabilities mCapabilities;
133 std::vector<bool> mSupportedOps;
134 };
135
136 // From a vector of DeviceSpecification, register new Devices.
registerDevices(std::vector<DeviceSpecification> specifications)137 void registerDevices(std::vector<DeviceSpecification> specifications) {
138 for (const auto& specification : specifications) {
139 DeviceManager::get()->forTest_registerDevice(
140 specification.mName.c_str(),
141 new TestDriver(specification.mName.c_str(), specification.mCapabilities,
142 specification.mSupportedOps));
143 }
144 }
145
selectDeviceByName(const std::string & name)146 bool selectDeviceByName(const std::string& name) {
147 uint32_t numDevices = 0;
148 EXPECT_EQ(ANeuralNetworks_getDeviceCount(&numDevices), ANEURALNETWORKS_NO_ERROR);
149 EXPECT_GE(numDevices, (uint32_t)1);
150
151 for (uint32_t i = 0; i < numDevices; i++) {
152 ANeuralNetworksDevice* device = nullptr;
153 EXPECT_EQ(ANeuralNetworks_getDevice(i, &device), ANEURALNETWORKS_NO_ERROR);
154 const char* buffer = nullptr;
155 int result = ANeuralNetworksDevice_getName(device, &buffer);
156 if (result == ANEURALNETWORKS_NO_ERROR && name.compare(buffer) == 0) {
157 mDevices.push_back(device);
158 return true;
159 }
160 }
161 return false;
162 }
163
isSupportedOpListExpected(const std::vector<bool> & expected)164 bool isSupportedOpListExpected(const std::vector<bool>& expected) {
165 const uint32_t kMaxNumberOperations = 256;
166 EXPECT_LE(expected.size(), kMaxNumberOperations);
167 ANeuralNetworksModel* modelHandle = mModel.getHandle();
168 bool supported[kMaxNumberOperations] = {false};
169 EXPECT_EQ(ANeuralNetworksModel_getSupportedOperationsForDevices(
170 modelHandle, mDevices.data(), mDevices.size(), supported),
171 ANEURALNETWORKS_NO_ERROR);
172 return std::equal(expected.begin(), expected.end(), supported);
173 }
174
prepareForExecution(bool measureTiming=false)175 int prepareForExecution(bool measureTiming = false) {
176 ANeuralNetworksModel* modelHandle = mModel.getHandle();
177 int result = ANeuralNetworksCompilation_createForDevices(modelHandle, mDevices.data(),
178 mDevices.size(), &mCompilation);
179 if (result != ANEURALNETWORKS_NO_ERROR) {
180 return result;
181 }
182 EXPECT_EQ(ANeuralNetworksCompilation_finish(mCompilation), ANEURALNETWORKS_NO_ERROR);
183 EXPECT_EQ(ANeuralNetworksExecution_create(mCompilation, &mExecution),
184 ANEURALNETWORKS_NO_ERROR);
185 if (measureTiming) {
186 // Don't call setMeasureTiming unless we need to -- cannot call this
187 // API unless there is exactly one device.
188 EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
189 ANEURALNETWORKS_NO_ERROR);
190 }
191 return ANEURALNETWORKS_NO_ERROR;
192 }
193
194 std::vector<ANeuralNetworksDevice*> mDevices;
195 ANeuralNetworksEvent* mEvent = nullptr;
196 ANeuralNetworksExecution* mExecution = nullptr;
197 ANeuralNetworksCompilation* mCompilation = nullptr;
198 WrapperModel mModel;
199 };
200
createSimpleAddModel(WrapperModel * model)201 void createSimpleAddModel(WrapperModel* model) {
202 WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
203 WrapperOperandType type1(WrapperType::INT32, {});
204 // Phase 1, operands
205 auto op1 = model->addOperand(&type0);
206 auto op2 = model->addOperand(&type0);
207 auto act = model->addOperand(&type1);
208 auto op3 = model->addOperand(&type0);
209 // Phase 2, operations
210 static int32_t act_init[] = {0};
211 model->setOperandValue(act, act_init, sizeof(act_init));
212 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
213 // Phase 3, inputs and outputs
214 model->identifyInputsAndOutputs({op1, op2}, {op3});
215 model->finish();
216 ASSERT_TRUE(model->isValid());
217 }
218
219 // This test verifies that a simple ADD model is able to run on a single device that claims being
220 // able to handle all operations.
TEST_F(IntrospectionControlTest,SimpleAddModel)221 TEST_F(IntrospectionControlTest, SimpleAddModel) {
222 // This is needed before we have the CPU fallback path being treated as a Device.
223 // TODO(miaowang): remove once b/72506261 is fixed.
224 if (DeviceManager::get()->getUseCpuOnly()) {
225 GTEST_SKIP();
226 }
227
228 createSimpleAddModel(&mModel);
229
230 std::string driverName = "test-all";
231 std::vector<bool> ops(android::nn::kNumberOfOperationTypes, true);
232 registerDevices({{driverName, 0.9, ops}});
233
234 EXPECT_TRUE(selectDeviceByName(driverName));
235 EXPECT_TRUE(isSupportedOpListExpected({true}));
236 EXPECT_EQ(prepareForExecution(), ANEURALNETWORKS_NO_ERROR);
237
238 // Verify that the mCompilation is actually using the "test-all" device.
239 CompilationBuilder* c = reinterpret_cast<CompilationBuilder*>(mCompilation);
240 const std::string& deviceNameBuffer =
241 c->forTest_getExecutionPlan().forTest_simpleGetDevice()->getName();
242 EXPECT_EQ(driverName, deviceNameBuffer);
243
244 float input1[2] = {1.0f, 2.0f};
245 float input2[2] = {3.0f, 4.0f};
246 float output[2];
247 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
248 ANEURALNETWORKS_NO_ERROR);
249 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
250 ANEURALNETWORKS_NO_ERROR);
251 EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
252 ANEURALNETWORKS_NO_ERROR);
253 EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
254 ANEURALNETWORKS_NO_ERROR);
255
256 EXPECT_EQ(ANeuralNetworksExecution_startCompute(mExecution, &mEvent), ANEURALNETWORKS_NO_ERROR);
257 EXPECT_EQ(ANeuralNetworksEvent_wait(mEvent), ANEURALNETWORKS_NO_ERROR);
258 EXPECT_EQ(output[0], input1[0] + input2[0]);
259 EXPECT_EQ(output[1], input1[1] + input2[1]);
260
261 uint64_t timeOnHardware, timeInDriver;
262 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_ON_HARDWARE,
263 &timeOnHardware),
264 ANEURALNETWORKS_NO_ERROR);
265 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_IN_DRIVER,
266 &timeInDriver),
267 ANEURALNETWORKS_NO_ERROR);
268 if (timeOnHardware != UINT64_MAX && timeInDriver != UINT64_MAX) {
269 EXPECT_LE(timeOnHardware, timeInDriver);
270 }
271 }
272
273 /*-- Begin test drivers -------------------------------------------------------------------------*/
274
275 namespace test_drivers {
276
277 enum class Success : uint32_t {
278 // ASYNC: Return ErrorStatus::GENERAL_FAILURE; notify ErrorStatus::GENERAL_FAILURE and
279 // kBadTiming
280 // SYNC, BURST: Return ErrorStatus::GENERAL_FAILURE and kBadTiming
281 // FENCED: Return ErrorStatus::GENERAL_FAILURE, empty hidl_handle, and a nullptr callback
282 FAIL_LAUNCH,
283
284 // ASYNC: Return ErrorStatus::NONE; notify ErrorStatus::GENERAL_FAILURE and kBadTiming
285 FAIL_WAIT,
286
287 // Bit representation for PASS: One bit set to indicate PASS rather than
288 // FAIL, one bit for each of the four timing fields (Unfenced, Fenced) x
289 // (OnDevice, InDriver) to distinguish between unavailable timing (bit is
290 // clear) and available timing (bit is set), and one bit to call out the
291 // special case of CPU.
292 PASS_BIT = 1 << 4,
293 PASS_UNFENCED_DEVICE_BIT = 1 << 5,
294 PASS_UNFENCED_DRIVER_BIT = 1 << 6,
295 PASS_FENCED_DEVICE_BIT = 1 << 7,
296 PASS_FENCED_DRIVER_BIT = 1 << 8,
297 PASS_CPU_BIT = 1 << 9,
298
299 // Each of the four timing fields may be either unavailable or 0
300 PASS_CPU = PASS_BIT | PASS_CPU_BIT,
301
302 // ASYNC: Return ErrorStatus::NONE; notify ErrorStatus::NONE and timing
303 // SYNC, BURST: Return ErrorStatus::NONE and timing
304 // FENCED: Return ErrorStatus::NONE, empty hidl_handle, and a callback with timing.
305 //
306 // For each PASS other than PASS_CPU, an enum name has the form
307 // PASS_${UNFENCED_TIME}_${FENCED_TIME}. For example, PASS_NEITHER_BOTH
308 // means that only fenced timing is available (both timeOnDevice and
309 // timeInDriver). If _${FENCED_TIME} is omitted, it is equivalent to
310 // _NEITHER; so PASS_BOTH means that only unfenced timing is available (both
311 // timeOnDevice and timeInDriver).
312 PASS_NEITHER = PASS_BIT,
313 PASS_DEVICE = PASS_BIT | PASS_UNFENCED_DEVICE_BIT,
314 PASS_DRIVER = PASS_BIT | PASS_UNFENCED_DRIVER_BIT,
315 PASS_BOTH = PASS_BIT | PASS_UNFENCED_DEVICE_BIT | PASS_UNFENCED_DRIVER_BIT,
316 PASS_NEITHER_DEVICE = PASS_BIT | PASS_FENCED_DEVICE_BIT,
317 PASS_NEITHER_DRIVER = PASS_BIT | PASS_FENCED_DRIVER_BIT,
318 PASS_NEITHER_BOTH = PASS_BIT | PASS_FENCED_DEVICE_BIT | PASS_FENCED_DRIVER_BIT,
319 PASS_DEVICE_DEVICE = PASS_DEVICE | PASS_NEITHER_DEVICE,
320 PASS_DEVICE_DRIVER = PASS_DEVICE | PASS_NEITHER_DRIVER,
321 PASS_DEVICE_BOTH = PASS_DEVICE | PASS_NEITHER_BOTH,
322 PASS_DRIVER_DEVICE = PASS_DRIVER | PASS_NEITHER_DEVICE,
323 PASS_DRIVER_DRIVER = PASS_DRIVER | PASS_NEITHER_DRIVER,
324 PASS_DRIVER_BOTH = PASS_DRIVER | PASS_NEITHER_BOTH,
325 PASS_BOTH_DEVICE = PASS_BOTH | PASS_NEITHER_DEVICE,
326 PASS_BOTH_DRIVER = PASS_BOTH | PASS_NEITHER_DRIVER,
327 PASS_BOTH_BOTH = PASS_BOTH | PASS_NEITHER_BOTH,
328 };
329
hasBit(Success mask,Success bit)330 bool hasBit(Success mask, Success bit) {
331 const uint32_t bitAsInt = static_cast<uint32_t>(bit);
332 CHECK(bitAsInt && (bitAsInt & (bitAsInt - 1)) == 0)
333 << "second argument must be a single bit rather than " << static_cast<uint32_t>(bit);
334 return static_cast<uint32_t>(mask) & bitAsInt;
335 }
336
clearBit(Success mask,Success bit)337 Success clearBit(Success mask, Success bit) {
338 const uint32_t bitAsInt = static_cast<uint32_t>(bit);
339 CHECK(bitAsInt && (bitAsInt & (bitAsInt - 1)) == 0)
340 << "second argument must be a single bit rather than " << static_cast<uint32_t>(bit);
341 return static_cast<Success>(static_cast<uint32_t>(mask) & ~bitAsInt);
342 }
343
operator <<(std::ostream & os,Success success)344 std::ostream& operator<<(std::ostream& os, Success success) {
345 switch (success) {
346 case Success::FAIL_LAUNCH:
347 return os << "FAIL_LAUNCH";
348 case Success::FAIL_WAIT:
349 return os << "FAIL_WAIT";
350 case Success::PASS_CPU:
351 return os << "PASS_CPU";
352 default:
353 break;
354 }
355
356 static const std::vector<std::pair<Success, const char*>> bits = {
357 {Success::PASS_BIT, "PASS"},
358 {Success::PASS_UNFENCED_DEVICE_BIT, "UNFENCED_DEVICE"},
359 {Success::PASS_UNFENCED_DRIVER_BIT, "UNFENCED_DRIVER"},
360 {Success::PASS_FENCED_DEVICE_BIT, "FENCED_DEVICE"},
361 {Success::PASS_FENCED_DRIVER_BIT, "FENCED_DRIVER"},
362 };
363 bool gotOutput = false;
364 for (const auto& b : bits) {
365 if (hasBit(success, b.first)) {
366 if (gotOutput) {
367 os << '|';
368 } else {
369 gotOutput = true;
370 }
371 os << b.second;
372 success = clearBit(success, b.first);
373 }
374 }
375 if (uint32_t successAsInt = static_cast<uint32_t>(success)) {
376 if (gotOutput) {
377 os << '|';
378 }
379 os << successAsInt;
380 }
381 return os;
382 }
383
384 // Returns (unfenced timing, fenced timing).
385 // Not for PASS_CPU.
getExpectedTiming(Success s,bool fencedExecution)386 std::pair<Timing, Timing> getExpectedTiming(Success s, bool fencedExecution) {
387 CHECK_NE(s, Success::PASS_CPU);
388
389 if (!hasBit(s, Success::PASS_BIT)) {
390 return {kBadTiming, kBadTiming};
391 }
392
393 std::pair<Timing, Timing> result;
394 result.first.timeOnDevice = hasBit(s, Success::PASS_UNFENCED_DEVICE_BIT)
395 ? kGoodUnfencedTiming.timeOnDevice
396 : UINT64_MAX;
397 result.first.timeInDriver = hasBit(s, Success::PASS_UNFENCED_DRIVER_BIT)
398 ? kGoodUnfencedTiming.timeInDriver
399 : UINT64_MAX;
400 if (fencedExecution) {
401 result.second.timeOnDevice = hasBit(s, Success::PASS_FENCED_DEVICE_BIT)
402 ? kGoodFencedTiming.timeOnDevice
403 : UINT64_MAX;
404 result.second.timeInDriver = hasBit(s, Success::PASS_FENCED_DRIVER_BIT)
405 ? kGoodFencedTiming.timeInDriver
406 : UINT64_MAX;
407 } else {
408 result.second = result.first;
409 }
410 return result;
411 }
412
413 // For these tests we don't care about actually running an inference -- we
414 // just want to placeholder up execution status and timing results, and control
415 // when the execution finishes.
416 class TestPreparedModelLatest : public SamplePreparedModel {
417 public:
TestPreparedModelLatest(const HidlModel & model,const SampleDriver * driver,Success success)418 TestPreparedModelLatest(const HidlModel& model, const SampleDriver* driver, Success success)
419 : SamplePreparedModel(model, driver, ExecutionPreference::FAST_SINGLE_ANSWER, uid_t{},
420 kDefaultPriority),
421 mSuccess(success) {}
422
execute(const V1_0::Request &,const sp<V1_0::IExecutionCallback> & callback)423 Return<V1_0::ErrorStatus> execute(const V1_0::Request&,
424 const sp<V1_0::IExecutionCallback>& callback) override {
425 switch (mSuccess) {
426 case Success::PASS_NEITHER:
427 std::thread([callback] {
428 dummyExecution();
429 callback->notify(V1_0::ErrorStatus::NONE);
430 }).detach();
431 return V1_0::ErrorStatus::NONE;
432 case Success::FAIL_LAUNCH:
433 dummyExecution();
434 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
435 return V1_0::ErrorStatus::GENERAL_FAILURE;
436 case Success::FAIL_WAIT:
437 std::thread([callback] {
438 dummyExecution();
439 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
440 }).detach();
441 return V1_0::ErrorStatus::NONE;
442 default:
443 ADD_FAILURE() << "Unexpected Success kind";
444 return V1_0::ErrorStatus::GENERAL_FAILURE;
445 }
446 }
447
execute_1_2(const V1_0::Request &,MeasureTiming measure,const sp<V1_2::IExecutionCallback> & callback)448 Return<V1_0::ErrorStatus> execute_1_2(const V1_0::Request&, MeasureTiming measure,
449 const sp<V1_2::IExecutionCallback>& callback) override {
450 EXPECT_EQ(measure, MeasureTiming::YES);
451 switch (mSuccess) {
452 case Success::PASS_NEITHER:
453 case Success::PASS_DEVICE:
454 case Success::PASS_DRIVER:
455 case Success::PASS_BOTH:
456 std::thread([this, callback] {
457 dummyExecution();
458 callback->notify_1_2(V1_0::ErrorStatus::NONE, {},
459 getExpectedTiming(mSuccess, false).first);
460 }).detach();
461 return V1_0::ErrorStatus::NONE;
462 case Success::FAIL_LAUNCH:
463 dummyExecution();
464 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
465 return V1_0::ErrorStatus::GENERAL_FAILURE;
466 case Success::FAIL_WAIT:
467 std::thread([callback] {
468 dummyExecution();
469 callback->notify(V1_0::ErrorStatus::GENERAL_FAILURE);
470 }).detach();
471 return V1_0::ErrorStatus::NONE;
472 default:
473 ADD_FAILURE() << "Unexpected Success kind";
474 return V1_0::ErrorStatus::GENERAL_FAILURE;
475 }
476 }
477
execute_1_3(const V1_3::Request &,MeasureTiming measure,const OptionalTimePoint &,const OptionalTimeoutDuration &,const sp<V1_3::IExecutionCallback> & callback)478 Return<V1_3::ErrorStatus> execute_1_3(const V1_3::Request&, MeasureTiming measure,
479 const OptionalTimePoint&, const OptionalTimeoutDuration&,
480 const sp<V1_3::IExecutionCallback>& callback) override {
481 // Use a placeholder V1_0::Request because execute_1_2 ignores request entirely.
482 const V1_0::ErrorStatus status = execute_1_2(V1_0::Request{}, measure, callback);
483 return convertToV1_3(status);
484 }
485
executeSynchronously(const V1_0::Request &,MeasureTiming measure,executeSynchronously_cb cb)486 Return<void> executeSynchronously(const V1_0::Request&, MeasureTiming measure,
487 executeSynchronously_cb cb) override {
488 EXPECT_EQ(measure, MeasureTiming::YES);
489 switch (mSuccess) {
490 case Success::PASS_NEITHER:
491 case Success::PASS_DEVICE:
492 case Success::PASS_DRIVER:
493 case Success::PASS_BOTH:
494 dummyExecution();
495 cb(V1_0::ErrorStatus::NONE, {}, getExpectedTiming(mSuccess, false).first);
496 return Void();
497 case Success::FAIL_WAIT:
498 // While this is a synchronous execution method, the NNAPI
499 // runtime may call it even for asynchronous execution, so we
500 // need to tolerate Success::FAIL_WAIT here, not just
501 // Success::FAIL_LAUNCH.
502 FALLTHROUGH_INTENDED;
503 case Success::FAIL_LAUNCH:
504 dummyExecution();
505 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kBadTiming);
506 return Void();
507 default:
508 ADD_FAILURE() << "Unexpected Success kind";
509 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kBadTiming);
510 return Void();
511 }
512 }
513
executeSynchronously_1_3(const V1_3::Request &,MeasureTiming measure,const OptionalTimePoint &,const OptionalTimeoutDuration &,executeSynchronously_1_3_cb cb)514 Return<void> executeSynchronously_1_3(const V1_3::Request&, MeasureTiming measure,
515 const OptionalTimePoint&, const OptionalTimeoutDuration&,
516 executeSynchronously_1_3_cb cb) override {
517 const auto wrappedCb = [&cb](V1_0::ErrorStatus status,
518 const hidl_vec<OutputShape>& outputShapes, Timing timing) {
519 cb(convertToV1_3(status), outputShapes, timing);
520 };
521 // Use a placeholder V1_0::Request because executeSynchronously ignores request entirely.
522 return executeSynchronously(V1_0::Request{}, measure, wrappedCb);
523 }
524
525 // ExecutionBurstServer::create has an overload that will use
526 // IPreparedModel::executeSynchronously(), so we can rely on that, rather
527 // than having to implement ExecutionBurstServer::IExecutorWithCache.
configureExecutionBurst(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,configureExecutionBurst_cb cb)528 Return<void> configureExecutionBurst(
529 const sp<V1_2::IBurstCallback>& callback,
530 const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
531 const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
532 configureExecutionBurst_cb cb) override {
533 const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create(
534 callback, requestChannel, resultChannel, this, std::chrono::microseconds{0});
535
536 cb(burst == nullptr ? V1_0::ErrorStatus::GENERAL_FAILURE : V1_0::ErrorStatus::NONE, burst);
537 return Void();
538 }
539
executeFenced(const Request &,const hidl_vec<hidl_handle> &,MeasureTiming measure,const OptionalTimePoint &,const OptionalTimeoutDuration &,const OptionalTimeoutDuration &,executeFenced_cb callback)540 Return<void> executeFenced(const Request&, const hidl_vec<hidl_handle>&, MeasureTiming measure,
541 const OptionalTimePoint&, const OptionalTimeoutDuration&,
542 const OptionalTimeoutDuration&, executeFenced_cb callback) override {
543 EXPECT_EQ(measure, MeasureTiming::YES);
544 if (hasBit(mSuccess, Success::PASS_BIT)) {
545 dummyExecution();
546 const auto expectedTiming = getExpectedTiming(mSuccess, true);
547 sp<SampleFencedExecutionCallback> fencedExecutionCallback =
548 new SampleFencedExecutionCallback(expectedTiming.first, expectedTiming.second,
549 V1_3::ErrorStatus::NONE);
550 callback(V1_3::ErrorStatus::NONE, hidl_handle(nullptr), fencedExecutionCallback);
551 return Void();
552 }
553 switch (mSuccess) {
554 case Success::FAIL_WAIT:
555 // Due to the limitation of the SampleDriver,
556 // FAIL_WAIT behaves the same as FAIL_LAUNCH.
557 // If the SampleDriver is updated to return real
558 // sync fences, this must be updated.
559 FALLTHROUGH_INTENDED;
560 case Success::FAIL_LAUNCH:
561 dummyExecution();
562 callback(V1_3::ErrorStatus::GENERAL_FAILURE, hidl_handle(nullptr), nullptr);
563 return Void();
564 default:
565 ADD_FAILURE() << "Unexpected Success kind";
566 return Void();
567 }
568 }
569
570 // We can place the TestPreparedModelLatest system in a "pause" mode where
571 // no execution will complete until the system is taken out of that mode.
572 // Initially, the system is not in that mode.
pauseExecutions(bool v)573 static void pauseExecutions(bool v) { mPauseExecutions.store(v); }
574
575 // This function is only guaranteed to work in the following pattern:
576 // - pauseExecutions(true);
577 // - // launch execution
578 // - // thread A: waitForExecutionToBegin()
579 // - // thread B: pauseExecutions(false);
waitForExecutionToBegin()580 static void waitForExecutionToBegin() {
581 CHECK(mPauseExecutions.load());
582 while (mExecutionsInFlight.load() == 0) {
583 }
584 }
585
586 private:
587 Success mSuccess;
588
589 static std::atomic<bool> mPauseExecutions;
590 static std::atomic<unsigned int> mExecutionsInFlight;
591
dummyExecution()592 static void dummyExecution() {
593 CHECK_EQ(mExecutionsInFlight.fetch_add(1), 0u) << "We do not support concurrent executions";
594 while (mPauseExecutions.load()) {
595 }
596 mExecutionsInFlight.fetch_sub(1);
597 }
598 };
599 std::atomic<bool> TestPreparedModelLatest::mPauseExecutions = false;
600 std::atomic<unsigned int> TestPreparedModelLatest::mExecutionsInFlight = 0;
601
602 using TestPreparedModel13 = TestPreparedModelLatest;
603
604 // Like TestPreparedModelLatest, but implementing 1.2
605 class TestPreparedModel12 : public V1_2::IPreparedModel {
606 public:
TestPreparedModel12(const HidlModel & model,const SampleDriver * driver,Success success)607 TestPreparedModel12(const HidlModel& model, const SampleDriver* driver, Success success)
608 : mLatestPreparedModel(new TestPreparedModelLatest(model, driver, success)) {}
609
execute(const V1_0::Request & request,const sp<V1_0::IExecutionCallback> & callback)610 Return<V1_0::ErrorStatus> execute(const V1_0::Request& request,
611 const sp<V1_0::IExecutionCallback>& callback) override {
612 return mLatestPreparedModel->execute(request, callback);
613 }
614
execute_1_2(const V1_0::Request & request,MeasureTiming measure,const sp<V1_2::IExecutionCallback> & callback)615 Return<V1_0::ErrorStatus> execute_1_2(const V1_0::Request& request, MeasureTiming measure,
616 const sp<V1_2::IExecutionCallback>& callback) override {
617 return mLatestPreparedModel->execute_1_2(request, measure, callback);
618 }
619
executeSynchronously(const V1_0::Request & request,MeasureTiming measure,executeSynchronously_cb cb)620 Return<void> executeSynchronously(const V1_0::Request& request, MeasureTiming measure,
621 executeSynchronously_cb cb) override {
622 return mLatestPreparedModel->executeSynchronously(request, measure, cb);
623 }
624
configureExecutionBurst(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,configureExecutionBurst_cb cb)625 Return<void> configureExecutionBurst(
626 const sp<V1_2::IBurstCallback>& callback,
627 const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
628 const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
629 configureExecutionBurst_cb cb) override {
630 return mLatestPreparedModel->configureExecutionBurst(callback, requestChannel,
631 resultChannel, cb);
632 }
633
634 private:
635 const sp<IPreparedModel> mLatestPreparedModel;
636 };
637
638 // Like TestPreparedModelLatest, but implementing 1.0
639 class TestPreparedModel10 : public V1_0::IPreparedModel {
640 public:
TestPreparedModel10(const HidlModel & model,const SampleDriver * driver,Success success)641 TestPreparedModel10(const HidlModel& model, const SampleDriver* driver, Success success)
642 : mLatestPreparedModel(new TestPreparedModelLatest(model, driver, success)) {}
643
execute(const V1_0::Request & request,const sp<V1_0::IExecutionCallback> & callback)644 Return<V1_0::ErrorStatus> execute(const V1_0::Request& request,
645 const sp<V1_0::IExecutionCallback>& callback) override {
646 return mLatestPreparedModel->execute(request, callback);
647 }
648
649 private:
650 const sp<IPreparedModel> mLatestPreparedModel;
651 };
652
653 // Behaves like SampleDriver, except that it produces customized IPrepareModel.
654 class TestDriver13 : public SampleDriver {
655 public:
TestDriver13(const std::string & name,Success success)656 TestDriver13(const std::string& name, Success success)
657 : SampleDriver(name.c_str()), mSuccess(success) {}
658
getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb)659 Return<void> getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb) override {
660 android::nn::initVLogMask();
661 const PerformanceInfo kPerf = {.execTime = 0.75f, .powerUsage = 0.75f};
662 Capabilities capabilities = {
663 .relaxedFloat32toFloat16PerformanceScalar = kPerf,
664 .relaxedFloat32toFloat16PerformanceTensor = kPerf,
665 .operandPerformance =
666 nn::nonExtensionOperandPerformance<nn::HalVersion::V1_3>(kPerf)};
667 _hidl_cb(V1_3::ErrorStatus::NONE, capabilities);
668 return Void();
669 }
670
getSupportedOperations_1_3(const HidlModel & model,getSupportedOperations_1_3_cb cb)671 Return<void> getSupportedOperations_1_3(const HidlModel& model,
672 getSupportedOperations_1_3_cb cb) override {
673 if (nn::validateModel(model)) {
674 std::vector<bool> supported(model.main.operations.size(), true);
675 cb(V1_3::ErrorStatus::NONE, supported);
676 } else {
677 cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {});
678 }
679 return Void();
680 }
681
getSupportedOperations_1_2(const V1_2::Model & model,getSupportedOperations_1_2_cb cb)682 Return<void> getSupportedOperations_1_2(const V1_2::Model& model,
683 getSupportedOperations_1_2_cb cb) override {
684 if (nn::validateModel(model)) {
685 std::vector<bool> supported(model.operations.size(), true);
686 cb(V1_0::ErrorStatus::NONE, supported);
687 } else {
688 std::vector<bool> supported;
689 cb(V1_0::ErrorStatus::INVALID_ARGUMENT, supported);
690 }
691 return Void();
692 }
693
prepareModel_1_3(const HidlModel & model,ExecutionPreference,Priority,const OptionalTimePoint &,const hidl_vec<hidl_handle> &,const hidl_vec<hidl_handle> &,const CacheToken &,const sp<V1_3::IPreparedModelCallback> & callback)694 Return<V1_3::ErrorStatus> prepareModel_1_3(
695 const HidlModel& model, ExecutionPreference, Priority, const OptionalTimePoint&,
696 const hidl_vec<hidl_handle>&, const hidl_vec<hidl_handle>&, const CacheToken&,
697 const sp<V1_3::IPreparedModelCallback>& callback) override {
698 callback->notify_1_3(V1_3::ErrorStatus::NONE,
699 new TestPreparedModel13(model, this, mSuccess));
700 return V1_3::ErrorStatus::NONE;
701 }
702
prepareModel_1_2(const V1_2::Model & model,ExecutionPreference,const hidl_vec<hidl_handle> &,const hidl_vec<hidl_handle> &,const CacheToken &,const sp<V1_2::IPreparedModelCallback> & callback)703 Return<V1_0::ErrorStatus> prepareModel_1_2(
704 const V1_2::Model& model, ExecutionPreference, const hidl_vec<hidl_handle>&,
705 const hidl_vec<hidl_handle>&, const CacheToken&,
706 const sp<V1_2::IPreparedModelCallback>& callback) override {
707 callback->notify_1_2(V1_0::ErrorStatus::NONE,
708 new TestPreparedModel12(nn::convertToV1_3(model), this, mSuccess));
709 return V1_0::ErrorStatus::NONE;
710 }
711
prepareModel_1_1(const V1_1::Model & model,ExecutionPreference,const sp<V1_0::IPreparedModelCallback> & callback)712 Return<V1_0::ErrorStatus> prepareModel_1_1(
713 const V1_1::Model& model, ExecutionPreference,
714 const sp<V1_0::IPreparedModelCallback>& callback) override {
715 callback->notify(V1_0::ErrorStatus::NONE,
716 new TestPreparedModel10(nn::convertToV1_3(model), this, mSuccess));
717 return V1_0::ErrorStatus::NONE;
718 }
719
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & callback)720 Return<V1_0::ErrorStatus> prepareModel(
721 const V1_0::Model& model, const sp<V1_0::IPreparedModelCallback>& callback) override {
722 return prepareModel_1_1(nn::convertToV1_1(model), ExecutionPreference::FAST_SINGLE_ANSWER,
723 callback);
724 }
725
726 private:
727 Success mSuccess;
728 };
729
730 // Like TestDriver, but implementing 1.1
731 class TestDriver11 : public V1_1::IDevice {
732 public:
TestDriver11(const std::string & name,Success success)733 TestDriver11(const std::string& name, Success success)
734 : mLatestDriver(new TestDriver13(name, success)) {}
getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb)735 Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
736 return mLatestDriver->getCapabilities_1_1(_hidl_cb);
737 }
getSupportedOperations_1_1(const V1_1::Model & model,getSupportedOperations_1_1_cb _hidl_cb)738 Return<void> getSupportedOperations_1_1(const V1_1::Model& model,
739 getSupportedOperations_1_1_cb _hidl_cb) override {
740 return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb);
741 }
prepareModel_1_1(const V1_1::Model & model,ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & actualCallback)742 Return<V1_0::ErrorStatus> prepareModel_1_1(
743 const V1_1::Model& model, ExecutionPreference preference,
744 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
745 return mLatestDriver->prepareModel_1_1(model, preference, actualCallback);
746 }
getStatus()747 Return<DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); }
getCapabilities(getCapabilities_cb _hidl_cb)748 Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override {
749 return mLatestDriver->getCapabilities(_hidl_cb);
750 }
getSupportedOperations(const V1_0::Model & model,getSupportedOperations_cb _hidl_cb)751 Return<void> getSupportedOperations(const V1_0::Model& model,
752 getSupportedOperations_cb _hidl_cb) override {
753 return mLatestDriver->getSupportedOperations(model, _hidl_cb);
754 }
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & actualCallback)755 Return<V1_0::ErrorStatus> prepareModel(
756 const V1_0::Model& model,
757 const sp<V1_0::IPreparedModelCallback>& actualCallback) override {
758 return mLatestDriver->prepareModel(model, actualCallback);
759 }
760
761 private:
762 const sp<V1_3::IDevice> mLatestDriver;
763 };
764
765 } // namespace test_drivers
766
767 /*-- End test drivers -------------------------------------------------------------------------*/
768
769 /*-- Begin timing tests -------------------------------------------------------------------------*/
770
771 namespace timing_tests {
772
773 using namespace test_drivers;
774
775 enum class DriverKind {
776 CPU,
777 OLD, // too old to support timing (1.1 or earlier)
778 NEW // new enough to support timing (1.2 or later)
779 };
780
operator <<(std::ostream & os,DriverKind kind)781 std::ostream& operator<<(std::ostream& os, DriverKind kind) {
782 const char* names[] = {"CPU", "OLD", "NEW"};
783 const uint32_t index = static_cast<uint32_t>(kind);
784 CHECK(index < std::size(names));
785 return os << names[index];
786 }
787
788 enum class Compute { ASYNC, SYNC, BURST, FENCED };
789
operator <<(std::ostream & os,Compute compute)790 std::ostream& operator<<(std::ostream& os, Compute compute) {
791 const char* names[] = {"ASYNC", "SYNC", "BURST", "FENCED"};
792 const uint32_t index = static_cast<uint32_t>(compute);
793 CHECK(index < std::size(names));
794 return os << names[index];
795 }
796
797 class TimingTest : public IntrospectionControlTest,
798 public ::testing::WithParamInterface<std::tuple<DriverKind, Success, Compute>> {
799 public:
TimingTest()800 TimingTest()
801 : kDriverKind(std::get<0>(GetParam())),
802 kSuccess(std::get<1>(GetParam())),
803 kCompute(std::get<2>(GetParam())) {}
804
805 protected:
806 const DriverKind kDriverKind;
807 const Success kSuccess;
808 const Compute kCompute;
809 };
810
TEST_P(TimingTest,Test)811 TEST_P(TimingTest, Test) {
812 // There's no straightforward way to force CPU execution to fail.
813 ASSERT_EQ(kDriverKind == DriverKind::CPU, kSuccess == Success::PASS_CPU);
814
815 // FAIL_WAIT only makes sense for ASYNC and FENCED.
816 ASSERT_TRUE(kCompute == Compute::ASYNC || kCompute == Compute::FENCED ||
817 kSuccess != Success::FAIL_WAIT);
818
819 if (DeviceManager::get()->getUseCpuOnly() != (kDriverKind == DriverKind::CPU)) {
820 // We don't have an elegant way to request the CPU driver. Therefore,
821 // we rely on our test framework to make the choice between CPU and
822 // non-CPU.
823 GTEST_SKIP();
824 }
825
826 createSimpleAddModel(&mModel);
827
828 switch (kDriverKind) {
829 case DriverKind::CPU: {
830 // There should be only one driver -- the CPU
831 const std::string& name = DeviceManager::get()->getDrivers()[0]->getName();
832 ASSERT_TRUE(selectDeviceByName(name));
833 break;
834 }
835 case DriverKind::OLD: {
836 static const char name[] = "old";
837 DeviceManager::get()->forTest_registerDevice(name, new TestDriver11(name, kSuccess));
838 ASSERT_TRUE(selectDeviceByName(name));
839 break;
840 }
841 case DriverKind::NEW: {
842 static const char name[] = "new";
843 DeviceManager::get()->forTest_registerDevice(name, new TestDriver13(name, kSuccess));
844 ASSERT_TRUE(selectDeviceByName(name));
845 break;
846 }
847 default:
848 FAIL() << "Unexpected DriverKind";
849 }
850
851 EXPECT_EQ(prepareForExecution(true /*measureTiming*/), ANEURALNETWORKS_NO_ERROR);
852
853 float input1[2] = {1.0f, 2.0f};
854 float input2[2] = {3.0f, 4.0f};
855 float output[2];
856 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
857 ANEURALNETWORKS_NO_ERROR);
858 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
859 ANEURALNETWORKS_NO_ERROR);
860 EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
861 ANEURALNETWORKS_NO_ERROR);
862 EXPECT_EQ(ANeuralNetworksExecution_setMeasureTiming(mExecution, true),
863 ANEURALNETWORKS_NO_ERROR);
864
865 auto Check = [](bool expectPass, int result) {
866 if (expectPass) {
867 ASSERT_EQ(result, ANEURALNETWORKS_NO_ERROR);
868 } else {
869 ASSERT_NE(result, ANEURALNETWORKS_NO_ERROR);
870 }
871 };
872
873 const bool isPass = hasBit(kSuccess, Success::PASS_BIT);
874 const int expectedGetDurationResultCode =
875 isPass ? ANEURALNETWORKS_NO_ERROR : ANEURALNETWORKS_BAD_STATE;
876
877 const auto getDurationWhileRunning = [this] {
878 if (kDriverKind == DriverKind::CPU) {
879 // Testing DriverKind::CPU would require modifying the CPU execution
880 // path to control execution completion, similarly to how this test
881 // case does with TestPreparedModel::dummyExecution(). This does not
882 // seem worthwhile -- it's intrusive into the runtime code solely
883 // for the sake of testing, and we do not expect that the code paths
884 // needed to ensure correct behavior of
885 // ANeuralNetworksExecution_getDuration() on a running execution
886 // would be any different for CPU than for actual drivers.
887 return;
888 }
889 TestPreparedModelLatest::waitForExecutionToBegin();
890 for (int durationCode :
891 std::vector{ANEURALNETWORKS_DURATION_ON_HARDWARE, ANEURALNETWORKS_DURATION_IN_DRIVER,
892 ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE,
893 ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER}) {
894 uint64_t time;
895 // Cannot query duration while execution is running
896 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, durationCode, &time),
897 ANEURALNETWORKS_BAD_STATE);
898 }
899 };
900
901 switch (kCompute) {
902 case Compute::ASYNC: {
903 // Ideally what we'd like to do here is
904 //
905 // Check(kSuccess != Success::FAIL_LAUNCH,
906 // ANeuralNetworksExecution_startCompute(mExecution, &mEvent));
907 // Check(isPass, ANeuralNetworksEvent_wait(mEvent));
908 //
909 // However, in the current implementation of the runtime, a launch
910 // failure at the HAL level does not show up as a launch failure at
911 // the NDK level ("startCompute"): The NNAPI runtime does not call a
912 // driver until it (the runtime) begins execution, so a launch
913 // failure at the HAL level looks like an execution failure at the
914 // NDK level ("wait").
915 SCOPED_TRACE("ASYNC startCompute");
916 TestPreparedModelLatest::pauseExecutions(true);
917 Check(true, // rather than kSuccess != Success::FAIL_LAUNCH
918 ANeuralNetworksExecution_startCompute(mExecution, &mEvent));
919 getDurationWhileRunning();
920 TestPreparedModelLatest::pauseExecutions(false);
921 SCOPED_TRACE("ASYNC wait");
922 Check(isPass, ANeuralNetworksEvent_wait(mEvent));
923 break;
924 }
925 case Compute::SYNC: {
926 SCOPED_TRACE("SYNC");
927 TestPreparedModelLatest::pauseExecutions(true);
928 std::thread run([this, Check, isPass] {
929 Check(isPass, ANeuralNetworksExecution_compute(mExecution));
930 });
931 getDurationWhileRunning();
932 TestPreparedModelLatest::pauseExecutions(false);
933 run.join();
934 break;
935 }
936 case Compute::BURST: {
937 SCOPED_TRACE("BURST");
938 ANeuralNetworksBurst* burst;
939 ASSERT_EQ(ANeuralNetworksBurst_create(mCompilation, &burst), ANEURALNETWORKS_NO_ERROR);
940 TestPreparedModelLatest::pauseExecutions(true);
941 std::thread run([this, Check, isPass, burst] {
942 Check(isPass, ANeuralNetworksExecution_burstCompute(mExecution, burst));
943 });
944 getDurationWhileRunning();
945 TestPreparedModelLatest::pauseExecutions(false);
946 run.join();
947 ANeuralNetworksBurst_free(burst);
948 break;
949 }
950 case Compute::FENCED: {
951 SCOPED_TRACE("FENCED startComputeWithDependencies");
952 TestPreparedModelLatest::pauseExecutions(true);
953
954 // Note, due to the limitation of SampleDriver implementation, the call is synchronous.
955 // If the SampleDriver is updated to return real sync fence, this must be updated.
956 std::thread run([this, Check, isPass] {
957 Check(isPass, ANeuralNetworksExecution_startComputeWithDependencies(
958 mExecution, nullptr, 0, 0, &mEvent));
959 });
960 getDurationWhileRunning();
961 TestPreparedModelLatest::pauseExecutions(false);
962 run.join();
963 SCOPED_TRACE("FENCED wait");
964 Check(isPass, ANeuralNetworksEvent_wait(mEvent));
965 break;
966 }
967 default:
968 FAIL() << "unreachable";
969 }
970
971 uint64_t timeOnHardware, timeInDriver, timeOnHardwareFenced, timeInDriverFenced;
972 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_ON_HARDWARE,
973 &timeOnHardware),
974 expectedGetDurationResultCode);
975 EXPECT_EQ(ANeuralNetworksExecution_getDuration(mExecution, ANEURALNETWORKS_DURATION_IN_DRIVER,
976 &timeInDriver),
977 expectedGetDurationResultCode);
978 EXPECT_EQ(
979 ANeuralNetworksExecution_getDuration(
980 mExecution, ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE, &timeOnHardwareFenced),
981 expectedGetDurationResultCode);
982 EXPECT_EQ(ANeuralNetworksExecution_getDuration(
983 mExecution, ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER, &timeInDriverFenced),
984 expectedGetDurationResultCode);
985 switch (kDriverKind) {
986 case DriverKind::CPU: {
987 // TODO: Should we require timing to be reported as 0?
988 EXPECT_TRUE(timeOnHardware == 0 || timeOnHardware == UINT64_MAX)
989 << "timeOnHardware = " << timeOnHardware;
990 EXPECT_TRUE(timeInDriver == 0 || timeInDriver == UINT64_MAX)
991 << "timeInDriver = " << timeOnHardware;
992 EXPECT_TRUE(timeOnHardwareFenced == 0 || timeOnHardwareFenced == UINT64_MAX)
993 << "timeOnHardwareFenced = " << timeOnHardwareFenced;
994 EXPECT_TRUE(timeInDriverFenced == 0 || timeInDriverFenced == UINT64_MAX)
995 << "timeInDriver = " << timeInDriverFenced;
996 break;
997 }
998 case DriverKind::OLD: {
999 EXPECT_EQ(timeOnHardware, UINT64_MAX);
1000 EXPECT_EQ(timeInDriver, UINT64_MAX);
1001 EXPECT_EQ(timeOnHardwareFenced, UINT64_MAX);
1002 EXPECT_EQ(timeInDriverFenced, UINT64_MAX);
1003 break;
1004 }
1005 case DriverKind::NEW: {
1006 auto microsToNanos = [](uint64_t micros) {
1007 constexpr uint64_t kNanosPerMicro = 1000;
1008 return micros == UINT64_MAX ? UINT64_MAX : kNanosPerMicro * micros;
1009 };
1010 auto expectedTiming = getExpectedTiming(kSuccess, kCompute == Compute::FENCED);
1011 EXPECT_EQ(timeOnHardware, microsToNanos(expectedTiming.first.timeOnDevice));
1012 EXPECT_EQ(timeInDriver, microsToNanos(expectedTiming.first.timeInDriver));
1013 EXPECT_EQ(timeOnHardwareFenced, microsToNanos(expectedTiming.second.timeOnDevice));
1014 EXPECT_EQ(timeInDriverFenced, microsToNanos(expectedTiming.second.timeInDriver));
1015 break;
1016 }
1017 default:
1018 FAIL() << "unreachable";
1019 }
1020 if (kCompute != Compute::FENCED) {
1021 EXPECT_EQ(timeOnHardware, timeOnHardwareFenced);
1022 EXPECT_EQ(timeInDriver, timeInDriverFenced);
1023 }
1024 auto expectTimingLe = [](uint64_t a, const char* aName, uint64_t b, const char* bName) {
1025 if (a != UINT64_MAX && b != UINT64_MAX) {
1026 EXPECT_LE(a, b) << aName << " exceeds " << bName;
1027 }
1028 };
1029 #define EXPECT_TIMING_LE(a, b) expectTimingLe(a, #a, b, #b)
1030 EXPECT_TIMING_LE(timeOnHardware, timeInDriver);
1031 EXPECT_TIMING_LE(timeOnHardwareFenced, timeInDriverFenced);
1032
1033 EXPECT_TIMING_LE(timeOnHardwareFenced, timeOnHardware);
1034 EXPECT_TIMING_LE(timeInDriverFenced, timeInDriver);
1035 #undef EXPECT_TIMING_LE
1036 }
1037
1038 auto kTimingTestUnfencedValues = ::testing::Values(
1039 // NOTE: We cannot force CPU execution to fail
1040 std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::ASYNC),
1041 std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::SYNC),
1042 std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::BURST),
1043
1044 // NOTE: OLD driver does not provide timing
1045 std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::ASYNC),
1046 std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::SYNC),
1047 std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::BURST),
1048
1049 std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::ASYNC),
1050 std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::SYNC),
1051 std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::BURST),
1052
1053 // NOTE: Only ASYNC is paired with a wait
1054 std::make_tuple(DriverKind::OLD, Success::FAIL_WAIT, Compute::ASYNC),
1055
1056 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::ASYNC),
1057 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::SYNC),
1058 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::BURST),
1059
1060 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::ASYNC),
1061 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::SYNC),
1062 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::BURST),
1063
1064 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::ASYNC),
1065 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::SYNC),
1066 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::BURST),
1067
1068 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::ASYNC),
1069 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::SYNC),
1070 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::BURST),
1071
1072 std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::ASYNC),
1073 std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::SYNC),
1074 std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::BURST),
1075
1076 // NOTE: Only ASYNC is paired with a wait
1077 std::make_tuple(DriverKind::NEW, Success::FAIL_WAIT, Compute::ASYNC));
1078
1079 auto kTimingTestFencedValues = ::testing::Values(
1080 // NOTE: We cannot force CPU execution to fail
1081 std::make_tuple(DriverKind::CPU, Success::PASS_CPU, Compute::FENCED),
1082
1083 // NOTE: OLD driver does not provide timing
1084 std::make_tuple(DriverKind::OLD, Success::PASS_NEITHER, Compute::FENCED),
1085
1086 std::make_tuple(DriverKind::OLD, Success::FAIL_LAUNCH, Compute::FENCED),
1087
1088 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER, Compute::FENCED),
1089 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE, Compute::FENCED),
1090 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER, Compute::FENCED),
1091 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH, Compute::FENCED),
1092 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_DEVICE, Compute::FENCED),
1093 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_DRIVER, Compute::FENCED),
1094 std::make_tuple(DriverKind::NEW, Success::PASS_NEITHER_BOTH, Compute::FENCED),
1095 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_DEVICE, Compute::FENCED),
1096 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_DRIVER, Compute::FENCED),
1097 std::make_tuple(DriverKind::NEW, Success::PASS_DEVICE_BOTH, Compute::FENCED),
1098 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_DEVICE, Compute::FENCED),
1099 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_DRIVER, Compute::FENCED),
1100 std::make_tuple(DriverKind::NEW, Success::PASS_DRIVER_BOTH, Compute::FENCED),
1101 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_DEVICE, Compute::FENCED),
1102 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_DRIVER, Compute::FENCED),
1103 std::make_tuple(DriverKind::NEW, Success::PASS_BOTH_BOTH, Compute::FENCED),
1104
1105 std::make_tuple(DriverKind::NEW, Success::FAIL_LAUNCH, Compute::FENCED));
1106
1107 INSTANTIATE_TEST_CASE_P(Unfenced, TimingTest, kTimingTestUnfencedValues);
1108 INSTANTIATE_TEST_CASE_P(Fenced, TimingTest, kTimingTestFencedValues);
1109
1110 } // namespace timing_tests
1111
1112 /*-- End timing tests -------------------------------------------------------------------------*/
1113
1114 const float kSimpleCeiling = 2.0f;
1115
createAddMaxModel(WrapperModel * model,bool reverseOrder)1116 void createAddMaxModel(WrapperModel* model, bool reverseOrder) {
1117 WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
1118 WrapperOperandType type1(WrapperType::INT32, {});
1119 // Phase 1, operands
1120 auto op1 = model->addOperand(&type0);
1121 auto op2 = model->addOperand(&type0);
1122 auto act = model->addOperand(&type1);
1123 auto op3 = model->addOperand(&type0);
1124 auto op4 = model->addOperand(&type0);
1125 auto op5 = model->addOperand(&type0);
1126 // Phase 2, operations
1127 static int32_t act_init[] = {0};
1128 model->setOperandValue(act, act_init, sizeof(act_init));
1129 static float ceiling[] = {kSimpleCeiling, kSimpleCeiling};
1130 model->setOperandValue(op4, ceiling, sizeof(ceiling));
1131 if (reverseOrder) {
1132 // In this case, add MAXIMUM first, but the execution order is still ADD -> MAXIMUM.
1133 model->addOperation(ANEURALNETWORKS_MAXIMUM, {op3, op4}, {op5});
1134 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1135 } else {
1136 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1137 model->addOperation(ANEURALNETWORKS_MAXIMUM, {op3, op4}, {op5});
1138 }
1139 // Phase 3, inputs and outputs
1140 model->identifyInputsAndOutputs({op1, op2}, {op5});
1141 model->finish();
1142 ASSERT_TRUE(model->isValid());
1143 }
1144
TEST_F(IntrospectionControlTest,SlicingAddMax)1145 TEST_F(IntrospectionControlTest, SlicingAddMax) {
1146 // This is needed before we have the CPU fallback path being treated as a Device.
1147 if (DeviceManager::get()->getUseCpuOnly()) {
1148 GTEST_SKIP();
1149 }
1150
1151 using namespace test_drivers;
1152
1153 static const char name[] = "driver11";
1154 DeviceManager::get()->forTest_registerDevice(name, new TestDriver11(name, Success::PASS_BOTH));
1155 ASSERT_TRUE(selectDeviceByName(name));
1156
1157 createAddMaxModel(&mModel, false);
1158 EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1159 }
1160
TEST_F(IntrospectionControlTest,SlicingMaxAdd)1161 TEST_F(IntrospectionControlTest, SlicingMaxAdd) {
1162 // This is needed before we have the CPU fallback path being treated as a Device.
1163 if (DeviceManager::get()->getUseCpuOnly()) {
1164 GTEST_SKIP();
1165 }
1166
1167 using namespace test_drivers;
1168
1169 static const char name[] = "driver11";
1170 DeviceManager::get()->forTest_registerDevice(name, new TestDriver11(name, Success::PASS_BOTH));
1171 ASSERT_TRUE(selectDeviceByName(name));
1172
1173 createAddMaxModel(&mModel, true);
1174 EXPECT_TRUE(isSupportedOpListExpected({false, true}));
1175 }
1176
1177 const float kSimpleMultiplier = 2.0f;
1178
createAddMulModel(WrapperModel * model,bool reverseOrder)1179 void createAddMulModel(WrapperModel* model, bool reverseOrder) {
1180 WrapperOperandType type0(WrapperType::TENSOR_FLOAT32, {2});
1181 WrapperOperandType type1(WrapperType::INT32, {});
1182 // Phase 1, operands
1183 auto op1 = model->addOperand(&type0);
1184 auto op2 = model->addOperand(&type0);
1185 auto act = model->addOperand(&type1);
1186 auto op3 = model->addOperand(&type0);
1187 auto op4 = model->addOperand(&type0);
1188 auto op5 = model->addOperand(&type0);
1189 // Phase 2, operations
1190 static int32_t act_init[] = {0};
1191 model->setOperandValue(act, act_init, sizeof(act_init));
1192 static float multiplier[] = {kSimpleMultiplier, kSimpleMultiplier};
1193 model->setOperandValue(op4, multiplier, sizeof(multiplier));
1194 if (reverseOrder) {
1195 // In this case, add MUL first, but the execution order is still ADD -> MUL.
1196 model->addOperation(ANEURALNETWORKS_MUL, {op3, op4, act}, {op5});
1197 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1198 } else {
1199 model->addOperation(ANEURALNETWORKS_ADD, {op1, op2, act}, {op3});
1200 model->addOperation(ANEURALNETWORKS_MUL, {op3, op4, act}, {op5});
1201 }
1202 // Phase 3, inputs and outputs
1203 model->identifyInputsAndOutputs({op1, op2}, {op5});
1204 model->finish();
1205 ASSERT_TRUE(model->isValid());
1206 }
1207
1208 // TODO(miaowang): add a test to make sure ANNCompilation_create() has CPU
1209 // fallback.
1210 // This test verifies that a device that could only handle ADD would correctly report that an
1211 // ADD->MUL model could not be fully supported.
TEST_F(IntrospectionControlTest,PartialModelNotSupported)1212 TEST_F(IntrospectionControlTest, PartialModelNotSupported) {
1213 // This is needed before we have the CPU fallback path being treated as a Device.
1214 if (DeviceManager::get()->getUseCpuOnly()) {
1215 GTEST_SKIP();
1216 }
1217
1218 createAddMulModel(&mModel, false);
1219
1220 std::string addOnlyDriver = "test-onlyAdd";
1221 std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1222 addOnlyOp[ANEURALNETWORKS_ADD] = true;
1223
1224 registerDevices({{addOnlyDriver, 0.9, addOnlyOp}});
1225
1226 EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1227 EXPECT_TRUE(isSupportedOpListExpected({true, false}));
1228
1229 ANeuralNetworksModel* modelHandle = mModel.getHandle();
1230 EXPECT_EQ(ANeuralNetworksCompilation_createForDevices(modelHandle, mDevices.data(),
1231 mDevices.size(), &mCompilation),
1232 ANEURALNETWORKS_NO_ERROR);
1233 // The compilation must fail as there is no fallback when using
1234 // Introspection API.
1235 EXPECT_NE(ANeuralNetworksCompilation_finish(mCompilation), ANEURALNETWORKS_NO_ERROR);
1236 }
1237
1238 // This test verifies that a device that could only handle ADD would correctly report that an
1239 // ADD->MUL model could not be fully supported. Also verifies that the indices of returned
1240 // supported op list correctly map to the order of operations being added by the user.
TEST_F(IntrospectionControlTest,PartialModelNotSupportedOrder)1241 TEST_F(IntrospectionControlTest, PartialModelNotSupportedOrder) {
1242 // This is needed before we have the CPU fallback path being treated as a Device.
1243 if (DeviceManager::get()->getUseCpuOnly()) {
1244 GTEST_SKIP();
1245 }
1246
1247 createAddMulModel(&mModel, true);
1248
1249 std::string addOnlyDriver = "test-onlyAdd";
1250 std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1251 addOnlyOp[ANEURALNETWORKS_ADD] = true;
1252
1253 registerDevices({{addOnlyDriver, 0.9, addOnlyOp}});
1254
1255 EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1256 EXPECT_TRUE(isSupportedOpListExpected({false, true}));
1257 }
1258
1259 // TODO(miaowang): update the test to make sure the model is actually running on the test devices.
1260 // This test verifies that an ADD->MUL model is able to run on two selected devices that together
1261 // can handle all operations.
TEST_F(IntrospectionControlTest,ModelNeedTwoDevices)1262 TEST_F(IntrospectionControlTest, ModelNeedTwoDevices) {
1263 // This is needed before we have the CPU fallback path being treated as a Device.
1264 if (DeviceManager::get()->getUseCpuOnly()) {
1265 GTEST_SKIP();
1266 }
1267
1268 createAddMulModel(&mModel, false);
1269
1270 std::string addOnlyDriver = "test-onlyAdd";
1271 std::vector<bool> addOnlyOp(android::nn::kNumberOfOperationTypes, false);
1272 addOnlyOp[ANEURALNETWORKS_ADD] = true;
1273
1274 std::string mulOnlyDriver = "test-onlyMul";
1275 std::vector<bool> mulOnlyOp(android::nn::kNumberOfOperationTypes, false);
1276 mulOnlyOp[ANEURALNETWORKS_MUL] = true;
1277
1278 registerDevices({
1279 {addOnlyDriver, 0.9, addOnlyOp},
1280 {mulOnlyDriver, 0.9, mulOnlyOp},
1281 });
1282
1283 EXPECT_TRUE(selectDeviceByName(addOnlyDriver));
1284 EXPECT_TRUE(selectDeviceByName(mulOnlyDriver));
1285 EXPECT_TRUE(isSupportedOpListExpected({true, true}));
1286 EXPECT_EQ(prepareForExecution(), ANEURALNETWORKS_NO_ERROR);
1287
1288 float input1[2] = {1.0f, 2.0f};
1289 float input2[2] = {3.0f, 4.0f};
1290 float output[2];
1291 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, input1, sizeof(input1)),
1292 ANEURALNETWORKS_NO_ERROR);
1293 EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 1, nullptr, input2, sizeof(input2)),
1294 ANEURALNETWORKS_NO_ERROR);
1295 EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, output, sizeof(output)),
1296 ANEURALNETWORKS_NO_ERROR);
1297
1298 EXPECT_EQ(ANeuralNetworksExecution_startCompute(mExecution, &mEvent), ANEURALNETWORKS_NO_ERROR);
1299 EXPECT_EQ(ANeuralNetworksEvent_wait(mEvent), ANEURALNETWORKS_NO_ERROR);
1300 EXPECT_EQ(output[0], kSimpleMultiplier * (input1[0] + input2[0]));
1301 EXPECT_EQ(output[1], kSimpleMultiplier * (input1[1] + input2[1]));
1302 }
1303 } // namespace
1304