1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Provides C++ classes to more easily use the Neural Networks API.
18 
19 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
20 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
21 
22 #include "NeuralNetworks.h"
23 
24 #include <assert.h>
25 #include <math.h>
26 #include <algorithm>
27 #include <optional>
28 #include <string>
29 #include <utility>
30 #include <vector>
31 
32 namespace android {
33 namespace nn {
34 namespace wrapper {
35 
36 enum class Type {
37     FLOAT32 = ANEURALNETWORKS_FLOAT32,
38     INT32 = ANEURALNETWORKS_INT32,
39     UINT32 = ANEURALNETWORKS_UINT32,
40     TENSOR_FLOAT32 = ANEURALNETWORKS_TENSOR_FLOAT32,
41     TENSOR_INT32 = ANEURALNETWORKS_TENSOR_INT32,
42     TENSOR_QUANT8_ASYMM = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
43     BOOL = ANEURALNETWORKS_BOOL,
44     TENSOR_QUANT16_SYMM = ANEURALNETWORKS_TENSOR_QUANT16_SYMM,
45     TENSOR_FLOAT16 = ANEURALNETWORKS_TENSOR_FLOAT16,
46     TENSOR_BOOL8 = ANEURALNETWORKS_TENSOR_BOOL8,
47     FLOAT16 = ANEURALNETWORKS_FLOAT16,
48     TENSOR_QUANT8_SYMM_PER_CHANNEL = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL,
49     TENSOR_QUANT16_ASYMM = ANEURALNETWORKS_TENSOR_QUANT16_ASYMM,
50     TENSOR_QUANT8_SYMM = ANEURALNETWORKS_TENSOR_QUANT8_SYMM,
51     MODEL = ANEURALNETWORKS_MODEL,
52 };
53 
54 enum class ExecutePreference {
55     PREFER_LOW_POWER = ANEURALNETWORKS_PREFER_LOW_POWER,
56     PREFER_FAST_SINGLE_ANSWER = ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER,
57     PREFER_SUSTAINED_SPEED = ANEURALNETWORKS_PREFER_SUSTAINED_SPEED
58 };
59 
60 enum class ExecutePriority {
61     LOW = ANEURALNETWORKS_PRIORITY_LOW,
62     MEDIUM = ANEURALNETWORKS_PRIORITY_MEDIUM,
63     HIGH = ANEURALNETWORKS_PRIORITY_HIGH,
64     DEFAULT = ANEURALNETWORKS_PRIORITY_DEFAULT,
65 };
66 
67 enum class Result {
68     NO_ERROR = ANEURALNETWORKS_NO_ERROR,
69     OUT_OF_MEMORY = ANEURALNETWORKS_OUT_OF_MEMORY,
70     INCOMPLETE = ANEURALNETWORKS_INCOMPLETE,
71     UNEXPECTED_NULL = ANEURALNETWORKS_UNEXPECTED_NULL,
72     BAD_DATA = ANEURALNETWORKS_BAD_DATA,
73     OP_FAILED = ANEURALNETWORKS_OP_FAILED,
74     UNMAPPABLE = ANEURALNETWORKS_UNMAPPABLE,
75     BAD_STATE = ANEURALNETWORKS_BAD_STATE,
76     OUTPUT_INSUFFICIENT_SIZE = ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE,
77     UNAVAILABLE_DEVICE = ANEURALNETWORKS_UNAVAILABLE_DEVICE,
78     MISSED_DEADLINE_TRANSIENT = ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT,
79     MISSED_DEADLINE_PERSISTENT = ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT,
80 };
81 
82 struct SymmPerChannelQuantParams {
83     ANeuralNetworksSymmPerChannelQuantParams params;
84     std::vector<float> scales;
85 
SymmPerChannelQuantParamsSymmPerChannelQuantParams86     SymmPerChannelQuantParams(std::vector<float> scalesVec, uint32_t channelDim)
87         : scales(std::move(scalesVec)) {
88         params = {
89                 .channelDim = channelDim,
90                 .scaleCount = static_cast<uint32_t>(scales.size()),
91                 .scales = scales.size() > 0 ? scales.data() : nullptr,
92         };
93     }
94 
SymmPerChannelQuantParamsSymmPerChannelQuantParams95     SymmPerChannelQuantParams(const SymmPerChannelQuantParams& other)
96         : params(other.params), scales(other.scales) {
97         params.scales = scales.size() > 0 ? scales.data() : nullptr;
98     }
99 
100     SymmPerChannelQuantParams& operator=(const SymmPerChannelQuantParams& other) {
101         if (this != &other) {
102             params = other.params;
103             scales = other.scales;
104             params.scales = scales.size() > 0 ? scales.data() : nullptr;
105         }
106         return *this;
107     }
108 };
109 
110 struct OperandType {
111     ANeuralNetworksOperandType operandType;
112     std::vector<uint32_t> dimensions;
113     std::optional<SymmPerChannelQuantParams> channelQuant;
114 
OperandTypeOperandType115     OperandType(const OperandType& other)
116         : operandType(other.operandType),
117           dimensions(other.dimensions),
118           channelQuant(other.channelQuant) {
119         operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr;
120     }
121 
122     OperandType& operator=(const OperandType& other) {
123         if (this != &other) {
124             operandType = other.operandType;
125             dimensions = other.dimensions;
126             channelQuant = other.channelQuant;
127             operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr;
128         }
129         return *this;
130     }
131 
132     OperandType(Type type, std::vector<uint32_t> d, float scale = 0.0f, int32_t zeroPoint = 0)
dimensionsOperandType133         : dimensions(std::move(d)), channelQuant(std::nullopt) {
134         operandType = {
135                 .type = static_cast<int32_t>(type),
136                 .dimensionCount = static_cast<uint32_t>(dimensions.size()),
137                 .dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr,
138                 .scale = scale,
139                 .zeroPoint = zeroPoint,
140         };
141     }
142 
OperandTypeOperandType143     OperandType(Type type, std::vector<uint32_t> data, SymmPerChannelQuantParams&& channelQuant)
144         : dimensions(std::move(data)), channelQuant(std::move(channelQuant)) {
145         assert(type == Type::TENSOR_QUANT8_SYMM_PER_CHANNEL);
146 
147         operandType = {
148                 .type = static_cast<int32_t>(type),
149                 .dimensionCount = static_cast<uint32_t>(dimensions.size()),
150                 .dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr,
151                 .scale = 0.0f,
152                 .zeroPoint = 0,
153         };
154     }
155 };
156 
157 class Memory {
158    public:
Memory(size_t size,int protect,int fd,size_t offset)159     Memory(size_t size, int protect, int fd, size_t offset) {
160         mValid = ANeuralNetworksMemory_createFromFd(size, protect, fd, offset, &mMemory) ==
161                  ANEURALNETWORKS_NO_ERROR;
162     }
163 
Memory(AHardwareBuffer * buffer)164     Memory(AHardwareBuffer* buffer) {
165         mValid = ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &mMemory) ==
166                  ANEURALNETWORKS_NO_ERROR;
167     }
168 
~Memory()169     ~Memory() { ANeuralNetworksMemory_free(mMemory); }
170 
171     // Disallow copy semantics to ensure the runtime object can only be freed
172     // once. Copy semantics could be enabled if some sort of reference counting
173     // or deep-copy system for runtime objects is added later.
174     Memory(const Memory&) = delete;
175     Memory& operator=(const Memory&) = delete;
176 
177     // Move semantics to remove access to the runtime object from the wrapper
178     // object that is being moved. This ensures the runtime object will be
179     // freed only once.
Memory(Memory && other)180     Memory(Memory&& other) { *this = std::move(other); }
181     Memory& operator=(Memory&& other) {
182         if (this != &other) {
183             ANeuralNetworksMemory_free(mMemory);
184             mMemory = other.mMemory;
185             mValid = other.mValid;
186             other.mMemory = nullptr;
187             other.mValid = false;
188         }
189         return *this;
190     }
191 
get()192     ANeuralNetworksMemory* get() const { return mMemory; }
isValid()193     bool isValid() const { return mValid; }
194 
195    private:
196     ANeuralNetworksMemory* mMemory = nullptr;
197     bool mValid = true;
198 };
199 
200 class Model {
201    public:
Model()202     Model() {
203         // TODO handle the value returned by this call
204         ANeuralNetworksModel_create(&mModel);
205     }
~Model()206     ~Model() { ANeuralNetworksModel_free(mModel); }
207 
208     // Disallow copy semantics to ensure the runtime object can only be freed
209     // once. Copy semantics could be enabled if some sort of reference counting
210     // or deep-copy system for runtime objects is added later.
211     Model(const Model&) = delete;
212     Model& operator=(const Model&) = delete;
213 
214     // Move semantics to remove access to the runtime object from the wrapper
215     // object that is being moved. This ensures the runtime object will be
216     // freed only once.
Model(Model && other)217     Model(Model&& other) { *this = std::move(other); }
218     Model& operator=(Model&& other) {
219         if (this != &other) {
220             ANeuralNetworksModel_free(mModel);
221             mModel = other.mModel;
222             mNextOperandId = other.mNextOperandId;
223             mValid = other.mValid;
224             other.mModel = nullptr;
225             other.mNextOperandId = 0;
226             other.mValid = false;
227         }
228         return *this;
229     }
230 
finish()231     Result finish() {
232         if (mValid) {
233             auto result = static_cast<Result>(ANeuralNetworksModel_finish(mModel));
234             if (result != Result::NO_ERROR) {
235                 mValid = false;
236             }
237             return result;
238         } else {
239             return Result::BAD_STATE;
240         }
241     }
242 
addOperand(const OperandType * type)243     uint32_t addOperand(const OperandType* type) {
244         if (ANeuralNetworksModel_addOperand(mModel, &(type->operandType)) !=
245             ANEURALNETWORKS_NO_ERROR) {
246             mValid = false;
247         }
248         if (type->channelQuant) {
249             if (ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
250                         mModel, mNextOperandId, &type->channelQuant.value().params) !=
251                 ANEURALNETWORKS_NO_ERROR) {
252                 mValid = false;
253             }
254         }
255         return mNextOperandId++;
256     }
257 
setOperandValue(uint32_t index,const void * buffer,size_t length)258     void setOperandValue(uint32_t index, const void* buffer, size_t length) {
259         if (ANeuralNetworksModel_setOperandValue(mModel, index, buffer, length) !=
260             ANEURALNETWORKS_NO_ERROR) {
261             mValid = false;
262         }
263     }
264 
setOperandValueFromMemory(uint32_t index,const Memory * memory,uint32_t offset,size_t length)265     void setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
266                                    size_t length) {
267         if (ANeuralNetworksModel_setOperandValueFromMemory(mModel, index, memory->get(), offset,
268                                                            length) != ANEURALNETWORKS_NO_ERROR) {
269             mValid = false;
270         }
271     }
272 
addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)273     void addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
274                       const std::vector<uint32_t>& outputs) {
275         if (ANeuralNetworksModel_addOperation(mModel, type, static_cast<uint32_t>(inputs.size()),
276                                               inputs.data(), static_cast<uint32_t>(outputs.size()),
277                                               outputs.data()) != ANEURALNETWORKS_NO_ERROR) {
278             mValid = false;
279         }
280     }
identifyInputsAndOutputs(const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)281     void identifyInputsAndOutputs(const std::vector<uint32_t>& inputs,
282                                   const std::vector<uint32_t>& outputs) {
283         if (ANeuralNetworksModel_identifyInputsAndOutputs(
284                     mModel, static_cast<uint32_t>(inputs.size()), inputs.data(),
285                     static_cast<uint32_t>(outputs.size()),
286                     outputs.data()) != ANEURALNETWORKS_NO_ERROR) {
287             mValid = false;
288         }
289     }
290 
relaxComputationFloat32toFloat16(bool isRelax)291     void relaxComputationFloat32toFloat16(bool isRelax) {
292         if (ANeuralNetworksModel_relaxComputationFloat32toFloat16(mModel, isRelax) ==
293             ANEURALNETWORKS_NO_ERROR) {
294             mRelaxed = isRelax;
295         }
296     }
297 
getHandle()298     ANeuralNetworksModel* getHandle() const { return mModel; }
isValid()299     bool isValid() const { return mValid; }
isRelaxed()300     bool isRelaxed() const { return mRelaxed; }
301 
302    protected:
303     ANeuralNetworksModel* mModel = nullptr;
304     // We keep track of the operand ID as a convenience to the caller.
305     uint32_t mNextOperandId = 0;
306     bool mValid = true;
307     bool mRelaxed = false;
308 };
309 
310 class Event {
311    public:
Event()312     Event() {}
~Event()313     ~Event() { ANeuralNetworksEvent_free(mEvent); }
314 
315     // Disallow copy semantics to ensure the runtime object can only be freed
316     // once. Copy semantics could be enabled if some sort of reference counting
317     // or deep-copy system for runtime objects is added later.
318     Event(const Event&) = delete;
319     Event& operator=(const Event&) = delete;
320 
321     // Move semantics to remove access to the runtime object from the wrapper
322     // object that is being moved. This ensures the runtime object will be
323     // freed only once.
Event(Event && other)324     Event(Event&& other) { *this = std::move(other); }
325     Event& operator=(Event&& other) {
326         if (this != &other) {
327             ANeuralNetworksEvent_free(mEvent);
328             mEvent = other.mEvent;
329             other.mEvent = nullptr;
330         }
331         return *this;
332     }
333 
wait()334     Result wait() { return static_cast<Result>(ANeuralNetworksEvent_wait(mEvent)); }
335 
336     // Only for use by Execution
set(ANeuralNetworksEvent * newEvent)337     void set(ANeuralNetworksEvent* newEvent) {
338         ANeuralNetworksEvent_free(mEvent);
339         mEvent = newEvent;
340     }
341 
342     // Only for use by Execution
getHandle()343     ANeuralNetworksEvent* getHandle() const { return mEvent; }
344 
345    private:
346     ANeuralNetworksEvent* mEvent = nullptr;
347 };
348 
349 class Compilation {
350    public:
Compilation(const Model * model)351     Compilation(const Model* model) {
352         int result = ANeuralNetworksCompilation_create(model->getHandle(), &mCompilation);
353         if (result != 0) {
354             // TODO Handle the error
355         }
356     }
357 
~Compilation()358     ~Compilation() { ANeuralNetworksCompilation_free(mCompilation); }
359 
360     // Disallow copy semantics to ensure the runtime object can only be freed
361     // once. Copy semantics could be enabled if some sort of reference counting
362     // or deep-copy system for runtime objects is added later.
363     Compilation(const Compilation&) = delete;
364     Compilation& operator=(const Compilation&) = delete;
365 
366     // Move semantics to remove access to the runtime object from the wrapper
367     // object that is being moved. This ensures the runtime object will be
368     // freed only once.
Compilation(Compilation && other)369     Compilation(Compilation&& other) { *this = std::move(other); }
370     Compilation& operator=(Compilation&& other) {
371         if (this != &other) {
372             ANeuralNetworksCompilation_free(mCompilation);
373             mCompilation = other.mCompilation;
374             other.mCompilation = nullptr;
375         }
376         return *this;
377     }
378 
setPreference(ExecutePreference preference)379     Result setPreference(ExecutePreference preference) {
380         return static_cast<Result>(ANeuralNetworksCompilation_setPreference(
381                 mCompilation, static_cast<int32_t>(preference)));
382     }
383 
setPriority(ExecutePriority priority)384     Result setPriority(ExecutePriority priority) {
385         return static_cast<Result>(ANeuralNetworksCompilation_setPriority(
386                 mCompilation, static_cast<int32_t>(priority)));
387     }
388 
setCaching(const std::string & cacheDir,const std::vector<uint8_t> & token)389     Result setCaching(const std::string& cacheDir, const std::vector<uint8_t>& token) {
390         if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) {
391             return Result::BAD_DATA;
392         }
393         return static_cast<Result>(ANeuralNetworksCompilation_setCaching(
394                 mCompilation, cacheDir.c_str(), token.data()));
395     }
396 
finish()397     Result finish() { return static_cast<Result>(ANeuralNetworksCompilation_finish(mCompilation)); }
398 
getHandle()399     ANeuralNetworksCompilation* getHandle() const { return mCompilation; }
400 
401    private:
402     ANeuralNetworksCompilation* mCompilation = nullptr;
403 };
404 
405 class Execution {
406    public:
Execution(const Compilation * compilation)407     Execution(const Compilation* compilation) {
408         int result = ANeuralNetworksExecution_create(compilation->getHandle(), &mExecution);
409         if (result != 0) {
410             // TODO Handle the error
411         }
412     }
413 
~Execution()414     ~Execution() { ANeuralNetworksExecution_free(mExecution); }
415 
416     // Disallow copy semantics to ensure the runtime object can only be freed
417     // once. Copy semantics could be enabled if some sort of reference counting
418     // or deep-copy system for runtime objects is added later.
419     Execution(const Execution&) = delete;
420     Execution& operator=(const Execution&) = delete;
421 
422     // Move semantics to remove access to the runtime object from the wrapper
423     // object that is being moved. This ensures the runtime object will be
424     // freed only once.
Execution(Execution && other)425     Execution(Execution&& other) { *this = std::move(other); }
426     Execution& operator=(Execution&& other) {
427         if (this != &other) {
428             ANeuralNetworksExecution_free(mExecution);
429             mExecution = other.mExecution;
430             other.mExecution = nullptr;
431         }
432         return *this;
433     }
434 
435     Result setInput(uint32_t index, const void* buffer, size_t length,
436                     const ANeuralNetworksOperandType* type = nullptr) {
437         return static_cast<Result>(
438                 ANeuralNetworksExecution_setInput(mExecution, index, type, buffer, length));
439     }
440 
441     Result setInputFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
442                               uint32_t length, const ANeuralNetworksOperandType* type = nullptr) {
443         return static_cast<Result>(ANeuralNetworksExecution_setInputFromMemory(
444                 mExecution, index, type, memory->get(), offset, length));
445     }
446 
447     Result setOutput(uint32_t index, void* buffer, size_t length,
448                      const ANeuralNetworksOperandType* type = nullptr) {
449         return static_cast<Result>(
450                 ANeuralNetworksExecution_setOutput(mExecution, index, type, buffer, length));
451     }
452 
453     Result setOutputFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
454                                uint32_t length, const ANeuralNetworksOperandType* type = nullptr) {
455         return static_cast<Result>(ANeuralNetworksExecution_setOutputFromMemory(
456                 mExecution, index, type, memory->get(), offset, length));
457     }
458 
startCompute(Event * event)459     Result startCompute(Event* event) {
460         ANeuralNetworksEvent* ev = nullptr;
461         Result result = static_cast<Result>(ANeuralNetworksExecution_startCompute(mExecution, &ev));
462         event->set(ev);
463         return result;
464     }
465 
startComputeWithDependencies(const std::vector<const Event * > & dependencies,uint64_t duration,Event * event)466     Result startComputeWithDependencies(const std::vector<const Event*>& dependencies,
467                                         uint64_t duration, Event* event) {
468         std::vector<const ANeuralNetworksEvent*> deps(dependencies.size());
469         std::transform(dependencies.begin(), dependencies.end(), deps.begin(),
470                        [](const Event* e) { return e->getHandle(); });
471         ANeuralNetworksEvent* ev = nullptr;
472         Result result = static_cast<Result>(ANeuralNetworksExecution_startComputeWithDependencies(
473                 mExecution, deps.data(), deps.size(), duration, &ev));
474         event->set(ev);
475         return result;
476     }
477 
compute()478     Result compute() { return static_cast<Result>(ANeuralNetworksExecution_compute(mExecution)); }
479 
getOutputOperandDimensions(uint32_t index,std::vector<uint32_t> * dimensions)480     Result getOutputOperandDimensions(uint32_t index, std::vector<uint32_t>* dimensions) {
481         uint32_t rank = 0;
482         Result result = static_cast<Result>(
483                 ANeuralNetworksExecution_getOutputOperandRank(mExecution, index, &rank));
484         dimensions->resize(rank);
485         if ((result != Result::NO_ERROR && result != Result::OUTPUT_INSUFFICIENT_SIZE) ||
486             rank == 0) {
487             return result;
488         }
489         result = static_cast<Result>(ANeuralNetworksExecution_getOutputOperandDimensions(
490                 mExecution, index, dimensions->data()));
491         return result;
492     }
493 
494    private:
495     ANeuralNetworksExecution* mExecution = nullptr;
496 };
497 
498 }  // namespace wrapper
499 }  // namespace nn
500 }  // namespace android
501 
502 #endif  //  ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
503