1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_OPERATIONS_UTILS_H
18 #define ANDROID_FRAMEWORKS_ML_NN_COMMON_OPERATIONS_UTILS_H
19 
20 #include <algorithm>
21 #include <cstdint>
22 #include <vector>
23 
24 #include "HalInterfaces.h"
25 #include "Utils.h"
26 
27 namespace android {
28 namespace nn {
29 
30 // DEPRECATED. Use NN_RET_CHECK instead.
31 #define NN_CHECK(x) NN_RET_CHECK(x)
32 #define NN_OPS_CHECK(x) NN_RET_CHECK(x)
33 
34 // DEPRECATED. Use NN_RET_CHECK_EQ instead.
35 #define NN_CHECK_EQ(x, y) NN_RET_CHECK_EQ(x, y)
36 
37 // An 8-bit boolean type (sizeof(bool) is implementation-defined).
38 typedef uint8_t bool8;
39 
40 enum PaddingScheme {
41     kPaddingUnknown = 0,
42     kPaddingSame = 1,
43     kPaddingValid = 2,
44 };
45 
46 // Stores operand type information. "Shape" is a historical name.
47 struct Shape {
48     hal::OperandType type = hal::OperandType::FLOAT32;
49     std::vector<uint32_t> dimensions;
50     float scale = 0.0f;
51     int32_t offset = 0;
52     hal::OperandExtraParams extraParams;
53 };
54 
55 // Provides information available during graph creation to validate an operation.
56 class IOperationValidationContext {
57    public:
~IOperationValidationContext()58     virtual ~IOperationValidationContext() {}
59 
60     virtual const char* getOperationName() const = 0;
61 
62     // The HAL version of the environment in which the operation is to be
63     // executed.
64     //
65     // Operation validation logic needs to handle all HAL versions to support
66     // the following use cases (assume in these examples that the latest HAL
67     // version is V1_2):
68     // 1. Our runtime wants to distribute work to a driver implementing an older
69     //    HAL version and calls, for example,
70     //    compliantWithV1_0(const V1_2::Model&).
71     // 2. A driver implements an older HAL version and delegates model
72     //    validation to, for example, validateModel(const V1_0::Model&).
73     //
74     // If getHalVersion() returns HalVersion::V1_0 and the operation
75     // is only supported since HalVersion::V1_1, validation will fail.
76     virtual HalVersion getHalVersion() const = 0;
77 
78     virtual uint32_t getNumInputs() const = 0;
79     virtual hal::OperandType getInputType(uint32_t index) const = 0;
80     virtual Shape getInputShape(uint32_t index) const = 0;
81     virtual const hal::OperandExtraParams getInputExtraParams(uint32_t index) const = 0;
82 
83     virtual uint32_t getNumOutputs() const = 0;
84     virtual hal::OperandType getOutputType(uint32_t index) const = 0;
85     virtual Shape getOutputShape(uint32_t index) const = 0;
86 };
87 
88 // Provides inputs and outputs during operation execution.
89 class IOperationExecutionContext {
90    public:
~IOperationExecutionContext()91     virtual ~IOperationExecutionContext() {}
92 
93     virtual uint32_t getNumInputs() const = 0;
94     virtual hal::OperandType getInputType(uint32_t index) const = 0;
95     virtual Shape getInputShape(uint32_t index) const = 0;
96     virtual const void* getInputBuffer(uint32_t index) const = 0;
97     virtual const hal::OperandExtraParams getInputExtraParams(uint32_t index) const = 0;
98 
99     virtual uint32_t getNumOutputs() const = 0;
100     virtual hal::OperandType getOutputType(uint32_t index) const = 0;
101     virtual Shape getOutputShape(uint32_t index) const = 0;
102     virtual void* getOutputBuffer(uint32_t index) = 0;
103 
104     // Updates the output shape, allocating the buffer if necessary.
105     virtual bool setOutputShape(uint32_t index, const Shape& shape) = 0;
106 
107     virtual bool isOmittedInput(uint32_t index) const = 0;
108     virtual bool isOmittedOutput(uint32_t index) const = 0;
109 
110     template <typename T>
getInputBuffer(uint32_t index)111     const T* getInputBuffer(uint32_t index) const {
112         return reinterpret_cast<const T*>(getInputBuffer(index));
113     }
114 
115     template <typename T>
getOutputBuffer(uint32_t index)116     T* getOutputBuffer(uint32_t index) {
117         return reinterpret_cast<T*>(getOutputBuffer(index));
118     }
119 
120     template <typename T>
getInputValue(uint32_t index)121     T getInputValue(uint32_t index) const {
122         return getInputBuffer<T>(index)[0];
123     }
124 };
125 
126 // Verifies that the number and types of operation inputs are as expected.
127 bool validateInputTypes(const IOperationValidationContext* context,
128                         const std::vector<hal::OperandType>& expectedTypes);
129 
130 // Verifies that the number and types of operation outputs are as expected.
131 bool validateOutputTypes(const IOperationValidationContext* context,
132                          const std::vector<hal::OperandType>& expectedTypes);
133 
134 // Verifies that the HAL version specified in the context is greater or equal
135 // than the minimal supported HAL version.
136 bool validateHalVersion(const IOperationValidationContext* context,
137                         HalVersion minSupportedHalVersion);
138 
139 // Verifies that the two shapes are the same.
140 bool SameShape(const Shape& in1, const Shape& in2);
141 
142 // Sets out to the same shape as in.
143 bool SetShape(const Shape& in, Shape* out);
144 
145 // Return the total number of elements, i.e. all the dimensions multiplied
146 // together. For a scalar, returns one.
147 uint32_t getNumberOfElements(const Shape& shape);
148 uint32_t getNumberOfElements(const Shape& shape, size_t firstAxisInclusive,
149                              size_t lastAxisExclusive);
150 
151 uint32_t getNumberOfDimensions(const Shape& shape);
152 
153 uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx);
154 
155 uint32_t hasKnownRank(const Shape& shape);
156 
157 // Converts an axis index from the range [-dims, dims) into the range [0, dims).
158 bool handleNegativeAxis(int32_t numberOfDimensions, int32_t* axis);
159 
handleNegativeAxis(const Shape & shape,int32_t * axis)160 inline bool handleNegativeAxis(const Shape& shape, int32_t* axis) {
161     return handleNegativeAxis(getNumberOfDimensions(shape), axis);
162 }
163 
computeOutSize(int32_t imageSize,int32_t filterSize,int32_t stride,int32_t paddingHead,int32_t paddingTail)164 inline int32_t computeOutSize(int32_t imageSize, int32_t filterSize, int32_t stride,
165                               int32_t paddingHead, int32_t paddingTail) {
166     return (imageSize - filterSize + stride + paddingHead + paddingTail) / stride;
167 }
168 
computeOutSize(int32_t imageSize,int32_t filterSize,int32_t stride,int32_t dilationRate,int32_t paddingHead,int32_t paddingTail)169 inline int32_t computeOutSize(int32_t imageSize, int32_t filterSize, int32_t stride,
170                               int32_t dilationRate, int32_t paddingHead, int32_t paddingTail) {
171     int32_t effectiveFilterSize = ((filterSize - 1) * dilationRate + 1);
172     return (imageSize - effectiveFilterSize + stride + paddingHead + paddingTail) / stride;
173 }
174 
computeOutSizeTransposeConv(int32_t imageSize,int32_t filterSize,int32_t stride,int32_t paddingHead,int32_t paddingTail)175 inline int32_t computeOutSizeTransposeConv(int32_t imageSize, int32_t filterSize, int32_t stride,
176                                            int32_t paddingHead, int32_t paddingTail) {
177     return imageSize * stride + filterSize - stride - paddingHead - paddingTail;
178 }
179 
180 __wur bool QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
181                               int32_t* shift);
182 
183 __wur bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t* quantized_multiplier,
184                                             int32_t* right_shift);
185 
186 // Same as QuantizeMultiplierSmallerThanOne but returns left shift (i.e. negated
187 // right shift), so that it has the same interface as
188 // QuantizeMultiplierGreaterThanOne and QuantizeMultiplier functions.
189 __wur bool QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
190                                                int32_t* quantized_multiplier, int32_t* left_shift);
191 
192 __wur bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t* quantized_multiplier,
193                                             int* left_shift);
194 
195 __wur bool GetQuantizedConvolutionMultipler(const Shape& inputShape, const Shape& filterShape,
196                                             const Shape& biasShape, const Shape& outputShape,
197                                             double* multiplier);
198 
199 void CalculateActivationRangeUint8(int32_t activation, const Shape& outputShape, int32_t* act_min,
200                                    int32_t* act_max);
201 
202 void CalculateActivationRangeInt8(int32_t activation, const Shape& outputShape, int32_t* act_min,
203                                   int32_t* act_max);
204 
205 void CalculateActivationRangeFloat(int32_t activation, float* activation_min,
206                                    float* activation_max);
207 
208 int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift);
209 
210 void calculateExplicitPaddingImpl(int32_t in_size, int32_t stride, int32_t dilation_factor,
211                                   int32_t filter_size, int32_t padding_implicit,
212                                   bool isTransposeConv, int32_t* padding_head,
213                                   int32_t* padding_tail);
214 
calculateExplicitPadding(int32_t in_size,int32_t stride,int32_t dilation_factor,int32_t filter_size,int32_t padding_implicit,int32_t * padding_head,int32_t * padding_tail)215 inline void calculateExplicitPadding(int32_t in_size, int32_t stride, int32_t dilation_factor,
216                                      int32_t filter_size, int32_t padding_implicit,
217                                      int32_t* padding_head, int32_t* padding_tail) {
218     calculateExplicitPaddingImpl(in_size, stride, dilation_factor, filter_size, padding_implicit,
219                                  /*isTransposeConv=*/false, padding_head, padding_tail);
220 }
221 
calculateExplicitPadding(int32_t in_size,int32_t stride,int32_t filter_size,int32_t padding_implicit,int32_t * padding_head,int32_t * padding_tail)222 inline void calculateExplicitPadding(int32_t in_size, int32_t stride, int32_t filter_size,
223                                      int32_t padding_implicit, int32_t* padding_head,
224                                      int32_t* padding_tail) {
225     calculateExplicitPadding(in_size, stride, 1, filter_size, padding_implicit, padding_head,
226                              padding_tail);
227 }
228 
calculateExplicitPaddingTransposeConv(int32_t in_size,int32_t stride,int32_t filter_size,int32_t padding_implicit,int32_t * padding_head,int32_t * padding_tail)229 inline void calculateExplicitPaddingTransposeConv(int32_t in_size, int32_t stride,
230                                                   int32_t filter_size, int32_t padding_implicit,
231                                                   int32_t* padding_head, int32_t* padding_tail) {
232     calculateExplicitPaddingImpl(in_size, stride, /*dilation_factor=*/1, filter_size,
233                                  padding_implicit, /*isTransposeConv=*/true, padding_head,
234                                  padding_tail);
235 }
236 
getPaddingScheme(int32_t inWidth,int32_t inHeight,int32_t strideWidth,int32_t strideHeight,int32_t filterWidth,int32_t filterHeight,int32_t paddingLeft,int32_t paddingRight,int32_t paddingTop,int32_t paddingBottom)237 inline PaddingScheme getPaddingScheme(int32_t inWidth, int32_t inHeight, int32_t strideWidth,
238                                       int32_t strideHeight, int32_t filterWidth,
239                                       int32_t filterHeight, int32_t paddingLeft,
240                                       int32_t paddingRight, int32_t paddingTop,
241                                       int32_t paddingBottom) {
242     if (paddingLeft == 0 && paddingRight == 0 && paddingTop == 0 && paddingBottom == 0) {
243         return kPaddingValid;
244     }
245 
246     int32_t expectedPaddingLeft, expectedPaddingRight;
247     int32_t expectedPaddingTop, expectedPaddingBottom;
248 
249     calculateExplicitPadding(inWidth, strideWidth, filterWidth, kPaddingSame, &expectedPaddingLeft,
250                              &expectedPaddingRight);
251     calculateExplicitPadding(inHeight, strideHeight, filterHeight, kPaddingSame,
252                              &expectedPaddingTop, &expectedPaddingBottom);
253     if (expectedPaddingLeft == paddingLeft && expectedPaddingRight == paddingRight &&
254         expectedPaddingTop == paddingTop && expectedPaddingBottom == paddingBottom) {
255         return kPaddingSame;
256     } else {
257         return kPaddingUnknown;
258     }
259 }
260 
261 // Reverse order of bits in the mask to match the expected order in kernel
ReverseMaskBits(int mask,int num_dimensions)262 inline int ReverseMaskBits(int mask, int num_dimensions) {
263     int out = 0;
264     for (int dim = 0; dim < num_dimensions; dim++) {
265         out <<= 1;
266         out += (mask & 1);
267         mask >>= 1;
268     }
269     return out;
270 }
271 
272 // Compute the positive remainder.
PositiveRemainder(int32_t dividend,int32_t divisor)273 inline int32_t PositiveRemainder(int32_t dividend, int32_t divisor) {
274     return (divisor + (dividend % divisor)) % divisor;
275 }
276 
277 // Compute clamped index.
ClampedIndex(int32_t index,int dim,bool pos_stride)278 inline int32_t ClampedIndex(int32_t index, int dim, bool pos_stride) {
279     return pos_stride
280                    ? (index >= dim ? dim
281                                    : PositiveRemainder(std::min(std::max(index, -dim), dim), dim))
282                    : (index < -dim
283                               ? -1
284                               : PositiveRemainder(std::min(std::max(index, -dim), dim - 1), dim));
285 }
286 
287 // Broadcasts input shape against one another and puts the result into output
288 // shape. Returns true on success and false on error.
289 bool calculateBroadcastedShape(const Shape& in1, const Shape& in2, Shape* out);
290 
291 // Dequantizes a value and quantizes it back using new scale and offset.
292 template <typename T>
293 T requantize(T value, const Shape& oldShape, const Shape& newShape);
294 
295 // Preparation functions for the corresponding ops
296 bool floorPrepare(const Shape& input, Shape* output);
297 
298 bool depthwiseConvPrepare(const Shape& input, const Shape& filter, const Shape& bias,
299                           int32_t padding_left, int32_t padding_right, int32_t padding_top,
300                           int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
301                           int32_t depth_multiplier, int32_t dilation_width_factor,
302                           int32_t dilation_height_factor, Shape* output);
303 
304 bool genericActivationPrepare(const Shape& input, Shape* output);
305 
306 bool reshapePrepare(const Shape& input, const int32_t* targetDims, const int32_t targetDimsSize,
307                     Shape* output);
308 
309 bool depthToSpacePrepare(const Shape& input, int32_t blockSize, Shape* output);
310 
311 bool spaceToDepthPrepare(const Shape& input, int32_t blockSize, Shape* output);
312 
313 bool embeddingLookupPrepare(const Shape& valueShape, const Shape& lookupShape, Shape* outputShape);
314 
315 bool hashtableLookupPrepare(const Shape& lookupShape, const Shape& keyShape,
316                             const Shape& valueShape, Shape* outputShape, Shape* hitShape);
317 
318 bool padPrepare(const Shape& input, const int32_t* paddingsData, const Shape& paddingsShape,
319                 Shape* output);
320 
321 bool batchToSpacePrepare(const Shape& input, const int32_t* blockSizeData,
322                          const Shape& blockSizeShape, Shape* output);
323 
324 bool spaceToBatchPrepare(const Shape& input, const int32_t* blockSizeData,
325                          const Shape& blockSizeShape, const int32_t* paddingsData,
326                          const Shape& paddingsShape, Shape* output);
327 
328 bool meanPrepare(const Shape& input, const int32_t* axisData, const Shape& axisShape, bool keepDims,
329                  Shape* output);
330 
331 bool argMinMaxPrepare(const Shape& input, int32_t axis, Shape* output);
332 
333 bool splitPrepare(const Shape& input, int32_t axis, int32_t numOutputs, std::vector<Shape>* output);
334 
335 bool groupedConvPrepare(const Shape& input, const Shape& filter, const Shape& bias,
336                         int32_t padding_left, int32_t padding_right, int32_t padding_top,
337                         int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
338                         int32_t numGroups, Shape* output);
339 
340 // Transposes the first two dimensions.
341 template <typename T>
transposeFirstTwoDimensions(const T * buffer,const Shape & shape,T * transposedBuffer)342 inline bool transposeFirstTwoDimensions(const T* buffer, const Shape& shape, T* transposedBuffer) {
343     const int numDims = getNumberOfDimensions(shape);
344     NN_RET_CHECK(numDims >= 2);
345     const int firstDim = getSizeOfDimension(shape, 0);
346     const int secondDim = getSizeOfDimension(shape, 1);
347     int blockSize = 1;
348     for (int i = 2; i < numDims; ++i) {
349         blockSize *= getSizeOfDimension(shape, i);
350     }
351 
352     for (int i = 0; i < firstDim; ++i) {
353         for (int j = 0; j < secondDim; ++j) {
354             for (int k = 0; k < blockSize; ++k) {
355                 transposedBuffer[(j * firstDim + i) * blockSize + k] =
356                         buffer[(i * secondDim + j) * blockSize + k];
357             }
358         }
359     }
360     return true;
361 }
362 
transposeFirstTwoDimensions(const Shape & shape,Shape * transposedShape)363 inline bool transposeFirstTwoDimensions(const Shape& shape, Shape* transposedShape) {
364     NN_RET_CHECK(getNumberOfDimensions(shape) >= 2);
365     *transposedShape = shape;
366     transposedShape->dimensions[0] = shape.dimensions[1];
367     transposedShape->dimensions[1] = shape.dimensions[0];
368     return true;
369 }
370 
371 // Given two 3-dimensional tensors, merge them into one 3-dimensional tensor
372 // at the third dimension. The merged tensor's third dimension size will be
373 // sum of that of the two inputs.
374 template <typename T>
mergeThirdDimension(const T * bufferA,const std::vector<uint32_t> & dimsA,const T * bufferB,const std::vector<uint32_t> & dimsB,T * merged)375 inline bool mergeThirdDimension(const T* bufferA, const std::vector<uint32_t>& dimsA,
376                                 const T* bufferB, const std::vector<uint32_t>& dimsB, T* merged) {
377     NN_RET_CHECK_EQ(dimsA.size(), 3u);
378     NN_RET_CHECK_EQ(dimsB.size(), 3u);
379 
380     NN_RET_CHECK_EQ(dimsA[0], dimsB[0]);
381     NN_RET_CHECK_EQ(dimsA[1], dimsB[1]);
382 
383     for (unsigned int i = 0; i < dimsA[0]; ++i) {
384         for (unsigned int j = 0; j < dimsA[1]; ++j) {
385             for (unsigned int k = 0; k < dimsA[2]; ++k) {
386                 merged[(i * dimsA[1] + j) * (dimsA[2] + dimsB[2]) + k] =
387                         bufferA[(i * dimsA[1] + j) * dimsA[2] + k];
388             }
389             for (unsigned int k = 0; k < dimsB[2]; ++k) {
390                 merged[(i * dimsA[1] + j) * (dimsA[2] + dimsB[2]) + dimsA[2] + k] =
391                         bufferB[(i * dimsB[1] + j) * dimsB[2] + k];
392             }
393         }
394     }
395     return true;
396 }
397 
398 template <typename T>
399 inline T saturateCast(int32_t val);
400 
401 template <>
402 inline uint8_t saturateCast<uint8_t>(int32_t val) {
403     return static_cast<int8_t>(std::max(0, std::min(255, val)));
404 }
405 
406 template <>
407 inline int8_t saturateCast<int8_t>(int32_t val) {
408     return static_cast<int8_t>(std::max(-128, std::min(127, val)));
409 }
410 
411 }  // namespace nn
412 }  // namespace android
413 
414 #endif  // ANDROID_FRAMEWORKS_ML_NN_COMMON_OPERATIONS_UTILS_H
415