1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "OperationsUtils"
18
19 #include "OperationsUtils.h"
20
21 #include <algorithm>
22 #include <cmath>
23 #include <limits>
24 #include <sstream>
25 #include <vector>
26
27 #include "Operations.h"
28 #include "Utils.h"
29
30 namespace android {
31 namespace nn {
32
33 namespace {
34
35 using namespace hal;
36
validateOperandTypes(const std::vector<OperandType> & expectedTypes,const char * tag,uint32_t operandCount,std::function<OperandType (uint32_t)> getOperandType)37 bool validateOperandTypes(const std::vector<OperandType>& expectedTypes, const char* tag,
38 uint32_t operandCount,
39 std::function<OperandType(uint32_t)> getOperandType) {
40 NN_RET_CHECK_EQ(operandCount, expectedTypes.size());
41 for (uint32_t i = 0; i < operandCount; ++i) {
42 OperandType type = getOperandType(i);
43 NN_RET_CHECK(type == expectedTypes[i])
44 << "Invalid " << tag << " tensor type " << toString(type) << " for " << tag << " "
45 << i << ", expected " << toString(expectedTypes[i]);
46 }
47 return true;
48 }
49
CalculateActivationRangeImpl(int32_t activation,const Shape & outputShape,int32_t qmin,int32_t qmax,int32_t * act_min,int32_t * act_max)50 void CalculateActivationRangeImpl(int32_t activation, const Shape& outputShape, int32_t qmin,
51 int32_t qmax, int32_t* act_min, int32_t* act_max) {
52 const auto scale = outputShape.scale;
53 const auto zero_point = outputShape.offset;
54
55 auto quantize = [scale, zero_point](float f) {
56 return zero_point + static_cast<int32_t>(std::round(f / scale));
57 };
58
59 if (activation == kActivationRelu) {
60 *act_min = std::max(qmin, quantize(0.0));
61 *act_max = qmax;
62 } else if (activation == kActivationRelu6) {
63 *act_min = std::max(qmin, quantize(0.0));
64 *act_max = std::min(qmax, quantize(6.0));
65 } else if (activation == kActivationRelu1) {
66 *act_min = std::max(qmin, quantize(-1.0));
67 *act_max = std::min(qmax, quantize(1.0));
68 } else if (activation == kActivationNone) {
69 *act_min = qmin;
70 *act_max = qmax;
71 } else {
72 LOG(ERROR) << "Unsupported fused activation function.";
73 }
74 }
75
76 } // namespace
77
validateInputTypes(const IOperationValidationContext * context,const std::vector<OperandType> & expectedTypes)78 bool validateInputTypes(const IOperationValidationContext* context,
79 const std::vector<OperandType>& expectedTypes) {
80 return validateOperandTypes(expectedTypes, "input", context->getNumInputs(),
81 [context](uint32_t index) { return context->getInputType(index); });
82 }
83
validateOutputTypes(const IOperationValidationContext * context,const std::vector<OperandType> & expectedTypes)84 bool validateOutputTypes(const IOperationValidationContext* context,
85 const std::vector<OperandType>& expectedTypes) {
86 return validateOperandTypes(
87 expectedTypes, "output", context->getNumOutputs(),
88 [context](uint32_t index) { return context->getOutputType(index); });
89 }
90
validateHalVersion(const IOperationValidationContext * context,HalVersion minSupportedHalVersion)91 bool validateHalVersion(const IOperationValidationContext* context,
92 HalVersion minSupportedHalVersion) {
93 if (context->getHalVersion() < minSupportedHalVersion) {
94 std::ostringstream message;
95 message << "Operation " << context->getOperationName() << " with inputs {";
96 for (uint32_t i = 0, n = context->getNumInputs(); i < n; ++i) {
97 if (i != 0) {
98 message << ", ";
99 }
100 message << toString(context->getInputType(i));
101 }
102 message << "} and outputs {";
103 for (uint32_t i = 0, n = context->getNumOutputs(); i < n; ++i) {
104 if (i != 0) {
105 message << ", ";
106 }
107 message << toString(context->getOutputType(i));
108 }
109 message << "} is only supported since " << toString(minSupportedHalVersion)
110 << " (validating using " << toString(context->getHalVersion()) << ")";
111 NN_RET_CHECK_FAIL() << message.str();
112 }
113 return true;
114 }
115
SameShape(const Shape & in1,const Shape & in2)116 bool SameShape(const Shape& in1, const Shape& in2) {
117 if (in1.type != in2.type || in1.dimensions.size() != in2.dimensions.size()) {
118 return false;
119 }
120 for (size_t i = 0; i < in1.dimensions.size(); i++) {
121 if (in1.dimensions[i] != in2.dimensions[i]) {
122 return false;
123 }
124 }
125 return true;
126 }
127
SetShape(const Shape & in,Shape * out)128 bool SetShape(const Shape& in, Shape* out) {
129 if (in.type != out->type) {
130 return false;
131 }
132 out->dimensions = in.dimensions;
133 return true;
134 }
135
getNumberOfElements(const Shape & shape)136 uint32_t getNumberOfElements(const Shape& shape) {
137 uint32_t count = 1;
138 for (size_t i = 0; i < shape.dimensions.size(); i++) {
139 count *= shape.dimensions[i];
140 }
141 return count;
142 }
143
getNumberOfElements(const Shape & shape,size_t firstAxisInclusive,size_t lastAxisExclusive)144 uint32_t getNumberOfElements(const Shape& shape, size_t firstAxisInclusive,
145 size_t lastAxisExclusive) {
146 nnAssert(0 <= firstAxisInclusive);
147 nnAssert(firstAxisInclusive <= lastAxisExclusive);
148 nnAssert(lastAxisExclusive <= shape.dimensions.size());
149 uint32_t count = 1;
150 for (size_t i = firstAxisInclusive; i < lastAxisExclusive; i++) {
151 count *= shape.dimensions[i];
152 }
153 return count;
154 }
155
getNumberOfDimensions(const Shape & shape)156 uint32_t getNumberOfDimensions(const Shape& shape) {
157 return shape.dimensions.size();
158 }
159
getSizeOfDimension(const Shape & shape,uint32_t dimensionIdx)160 uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx) {
161 nnAssert(0 <= dimensionIdx && dimensionIdx < shape.dimensions.size());
162 return shape.dimensions[dimensionIdx];
163 }
164
hasKnownRank(const Shape & shape)165 uint32_t hasKnownRank(const Shape& shape) {
166 return !shape.dimensions.empty();
167 }
168
handleNegativeAxis(int32_t numberOfDimensions,int32_t * axis)169 bool handleNegativeAxis(int32_t numberOfDimensions, int32_t* axis) {
170 NN_CHECK(-numberOfDimensions <= *axis && *axis < numberOfDimensions);
171 if (*axis < 0) {
172 *axis += numberOfDimensions;
173 }
174 return true;
175 }
176
QuantizeMultiplier(double double_multiplier,int32_t * quantized_multiplier,int32_t * shift)177 bool QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier, int32_t* shift) {
178 if (double_multiplier == 0.) {
179 *quantized_multiplier = 0;
180 *shift = 0;
181 return true;
182 }
183 const double q = std::frexp(double_multiplier, shift);
184 auto q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
185 NN_RET_CHECK(q_fixed <= (1ll << 31));
186 if (q_fixed == (1ll << 31)) {
187 q_fixed /= 2;
188 ++*shift;
189 }
190 NN_RET_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
191 // A shift amount smaller than -31 would cause all bits to be shifted out
192 // and thus all results would be zero. We implement that instead with
193 // q_fixed==0, so as to avoid hitting issues with right-shift
194 // operations with shift amounts greater than 31. Note that this happens
195 // roughly when abs(double_multiplier) < 2^-31 and the present handling means
196 // that we're effectively flushing tiny double_multiplier's to zero.
197 // We could conceivably handle values in the range (roughly) [32, 63]
198 // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
199 // the present handling is just doing 'flush denormals to zero'. We could
200 // reconsider and actually generate nonzero denormals if a need arises.
201 if (*shift < -31) {
202 *shift = 0;
203 q_fixed = 0;
204 }
205 *quantized_multiplier = static_cast<int32_t>(q_fixed);
206 return true;
207 }
208
QuantizeMultiplierSmallerThanOneExp(double double_multiplier,int32_t * quantized_multiplier,int32_t * left_shift)209 bool QuantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t* quantized_multiplier,
210 int32_t* left_shift) {
211 NN_RET_CHECK(double_multiplier > 0.);
212 NN_RET_CHECK(double_multiplier < 1.);
213 NN_RET_CHECK(QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift));
214 NN_RET_CHECK(*left_shift <= 0);
215 return true;
216 }
217
QuantizeMultiplierSmallerThanOne(double double_multiplier,int32_t * quantized_multiplier,int32_t * right_shift)218 bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t* quantized_multiplier,
219 int32_t* right_shift) {
220 NN_OPS_CHECK(double_multiplier >= 0.);
221 NN_OPS_CHECK(double_multiplier < 1.);
222 if (double_multiplier == 0.) {
223 *quantized_multiplier = 0;
224 *right_shift = 0;
225 return true;
226 }
227 NN_OPS_CHECK(double_multiplier > 0.);
228 const double q = std::frexp(double_multiplier, right_shift);
229 *right_shift *= -1;
230 int64_t q_fixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
231 NN_OPS_CHECK(q_fixed <= (1LL << 31));
232 if (q_fixed == (1LL << 31)) {
233 q_fixed /= 2;
234 --*right_shift;
235 }
236 NN_OPS_CHECK(*right_shift >= 0);
237 NN_OPS_CHECK(q_fixed <= std::numeric_limits<int32_t>::max());
238 *quantized_multiplier = static_cast<int32_t>(q_fixed);
239 return true;
240 }
241
QuantizeMultiplierGreaterThanOne(double double_multiplier,int32_t * quantized_multiplier,int * left_shift)242 bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t* quantized_multiplier,
243 int* left_shift) {
244 NN_OPS_CHECK(double_multiplier > 1.);
245 const double q = std::frexp(double_multiplier, left_shift);
246 int64_t q_fixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
247 NN_OPS_CHECK(q_fixed <= (1LL << 31));
248 if (q_fixed == (1LL << 31)) {
249 q_fixed /= 2;
250 ++*left_shift;
251 }
252 NN_OPS_CHECK(*left_shift >= 0);
253 NN_OPS_CHECK(q_fixed <= std::numeric_limits<int32_t>::max());
254 *quantized_multiplier = static_cast<int32_t>(q_fixed);
255 return true;
256 }
257
GetQuantizedConvolutionMultipler(const Shape & inputShape,const Shape & filterShape,const Shape & biasShape,const Shape & outputShape,double * multiplier)258 bool GetQuantizedConvolutionMultipler(const Shape& inputShape, const Shape& filterShape,
259 const Shape& biasShape, const Shape& outputShape,
260 double* multiplier) {
261 // Upcast bias and input_product to double
262 const double input_product_scale = inputShape.scale * filterShape.scale;
263 const double bias_scale = biasShape.scale;
264
265 // The following conditions must be guaranteed by the training pipeline.
266 NN_OPS_CHECK(std::abs(input_product_scale - bias_scale) <=
267 1e-6 * std::min(input_product_scale, bias_scale));
268 NN_OPS_CHECK(input_product_scale >= 0);
269 *multiplier = input_product_scale / outputShape.scale;
270 return true;
271 }
272
CalculateActivationRangeUint8(int32_t activation,const Shape & outputShape,int32_t * act_min,int32_t * act_max)273 void CalculateActivationRangeUint8(int32_t activation, const Shape& outputShape, int32_t* act_min,
274 int32_t* act_max) {
275 const int32_t qmin = std::numeric_limits<uint8_t>::min();
276 const int32_t qmax = std::numeric_limits<uint8_t>::max();
277
278 CalculateActivationRangeImpl(activation, outputShape, qmin, qmax, act_min, act_max);
279 }
280
CalculateActivationRangeInt8(int32_t activation,const Shape & outputShape,int32_t * act_min,int32_t * act_max)281 void CalculateActivationRangeInt8(int32_t activation, const Shape& outputShape, int32_t* act_min,
282 int32_t* act_max) {
283 const int32_t qmin = std::numeric_limits<int8_t>::min();
284 const int32_t qmax = std::numeric_limits<int8_t>::max();
285
286 CalculateActivationRangeImpl(activation, outputShape, qmin, qmax, act_min, act_max);
287 }
288
CalculateActivationRangeFloat(int32_t activation,float * activation_min,float * activation_max)289 void CalculateActivationRangeFloat(int32_t activation, float* activation_min,
290 float* activation_max) {
291 if (activation == kActivationRelu) {
292 *activation_min = 0.f;
293 *activation_max = std::numeric_limits<float>::max();
294 } else if (activation == kActivationRelu6) {
295 *activation_min = 0.f;
296 *activation_max = 6.f;
297 } else if (activation == kActivationRelu1) {
298 *activation_min = -1.f;
299 *activation_max = 1.f;
300 } else if (activation == kActivationNone) {
301 *activation_min = std::numeric_limits<float>::lowest();
302 *activation_max = std::numeric_limits<float>::max();
303 } else {
304 LOG(ERROR) << "Unsupported fused activation function.";
305 }
306 }
307
CalculateInputRadius(int input_integer_bits,int input_left_shift)308 int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift) {
309 const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
310 (1LL << (31 - input_integer_bits)) /
311 (1LL << input_left_shift);
312 // Tighten bound using floor. Suppose that we could use the exact value.
313 // After scaling the difference, the result would be at the maximum. Thus we
314 // must ensure that our value has lower magnitude.
315 return static_cast<int32_t>(std::floor(max_input_rescaled));
316 }
317
calculateExplicitPaddingImpl(int32_t in_size,int32_t stride,int32_t dilation_factor,int32_t filter_size,int32_t padding_implicit,bool isTransposeConv,int32_t * padding_head,int32_t * padding_tail)318 void calculateExplicitPaddingImpl(int32_t in_size, int32_t stride, int32_t dilation_factor,
319 int32_t filter_size, int32_t padding_implicit,
320 bool isTransposeConv, int32_t* padding_head,
321 int32_t* padding_tail) {
322 *padding_head = 0;
323 *padding_tail = 0;
324
325 int32_t effective_filter_size = (filter_size - 1) * dilation_factor + 1;
326
327 if (padding_implicit == kPaddingSame) {
328 int32_t out_size = (in_size + stride - 1) / stride;
329 int32_t tmp = (out_size - 1) * stride + effective_filter_size;
330 if (tmp > in_size) {
331 *padding_head = (tmp - in_size) / 2;
332 *padding_tail = (tmp - in_size) - *padding_head;
333 }
334 // For transpose conv, make padding tail fit tightly to the end of the last stride.
335 if (isTransposeConv) {
336 *padding_tail = (tmp - in_size) - *padding_head;
337 }
338 }
339 }
340
calculateBroadcastedShape(const Shape & in1,const Shape & in2,Shape * out)341 bool calculateBroadcastedShape(const Shape& in1, const Shape& in2, Shape* out) {
342 NN_RET_CHECK(in1.type == in2.type);
343 uint32_t numberOfDims1 = getNumberOfDimensions(in1);
344 uint32_t numberOfDims2 = getNumberOfDimensions(in2);
345 uint32_t maxDims = std::max(numberOfDims1, numberOfDims2);
346 out->dimensions = std::vector<uint32_t>(maxDims);
347 for (uint32_t i = 1; i <= maxDims; i++) {
348 uint32_t dim1 = 1;
349 if (i <= numberOfDims1) {
350 dim1 = getSizeOfDimension(in1, numberOfDims1 - i);
351 }
352 uint32_t dim2 = 1;
353 if (i <= numberOfDims2) {
354 dim2 = getSizeOfDimension(in2, numberOfDims2 - i);
355 }
356 if (dim1 != dim2 && dim1 != 1 && dim2 != 1) {
357 LOG(ERROR) << "Dimensions mismatch for broadcast:\n"
358 << "First tensor: dimension " << numberOfDims1 - i << " of size " << dim1
359 << "\nSecond tensor: dimension " << numberOfDims2 - i << "of size " << dim2;
360 return false;
361 }
362 out->dimensions[maxDims - i] = (dim1 == 1) ? dim2 : dim1;
363 }
364 return true;
365 }
366
367 template <>
requantize(uint8_t value,const Shape & oldShape,const Shape & newShape)368 uint8_t requantize<uint8_t>(uint8_t value, const Shape& oldShape, const Shape& newShape) {
369 double doubleValue = (value - oldShape.offset) * oldShape.scale;
370 double doubleRet = doubleValue / newShape.scale + newShape.offset;
371 if (doubleRet < 0) return 0;
372 if (doubleRet > 255) return 255;
373 return static_cast<uint8_t>(std::round(doubleRet));
374 }
375
376 template <>
requantize(int8_t value,const Shape & oldShape,const Shape & newShape)377 int8_t requantize<int8_t>(int8_t value, const Shape& oldShape, const Shape& newShape) {
378 double doubleValue = (value - oldShape.offset) * oldShape.scale;
379 double doubleRet = doubleValue / newShape.scale + newShape.offset;
380 if (doubleRet < -128) return -128;
381 if (doubleRet > 127) return 127;
382 return static_cast<int8_t>(std::round(doubleRet));
383 }
384
reshapePrepare(const Shape & input,const int32_t * targetDims,const int32_t targetDimsSize,Shape * output)385 bool reshapePrepare(const Shape& input, const int32_t* targetDims, const int32_t targetDimsSize,
386 Shape* output) {
387 // Reshape allows one of the targetDims components to have the
388 // special -1 value, meaning it will be calculated automatically based on the
389 // input. Here we calculate what that dimension should be so that the number
390 // of output elements in the same as the number of input elements.
391 int32_t numInputElements = (int32_t)getNumberOfElements(input);
392
393 std::vector<uint32_t> outDims(targetDimsSize);
394 int32_t numOutputElements = 1;
395 int32_t strechDim = -1;
396 for (int32_t i = 0; i < targetDimsSize; ++i) {
397 int32_t value = targetDims[i];
398 if (value == -1) {
399 NN_OPS_CHECK(strechDim == -1);
400 strechDim = i;
401 } else {
402 numOutputElements *= value;
403 outDims[i] = (uint32_t)value;
404 }
405 }
406 if (strechDim != -1) {
407 int32_t strechValue = numInputElements / numOutputElements;
408 outDims[strechDim] = (uint32_t)strechValue;
409 numOutputElements *= strechValue;
410 }
411
412 NN_OPS_CHECK(numInputElements == numOutputElements);
413
414 output->type = input.type;
415 output->dimensions = outDims;
416 output->offset = input.offset;
417 output->scale = input.scale;
418
419 return true;
420 }
421
depthToSpacePrepare(const Shape & input,int32_t blockSize,Shape * output)422 bool depthToSpacePrepare(const Shape& input, int32_t blockSize, Shape* output) {
423 NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
424 NN_OPS_CHECK(blockSize > 0);
425
426 uint32_t batches = getSizeOfDimension(input, 0);
427 uint32_t height = getSizeOfDimension(input, 1);
428 uint32_t width = getSizeOfDimension(input, 2);
429 uint32_t channels = getSizeOfDimension(input, 3);
430
431 NN_OPS_CHECK(channels % (blockSize * blockSize) == 0);
432 output->type = input.type;
433 output->dimensions = {batches, height * blockSize, width * blockSize,
434 channels / (blockSize * blockSize)};
435 output->offset = input.offset;
436 output->scale = input.scale;
437
438 return true;
439 }
440
spaceToDepthPrepare(const Shape & input,int32_t blockSize,Shape * output)441 bool spaceToDepthPrepare(const Shape& input, int32_t blockSize, Shape* output) {
442 NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
443 NN_OPS_CHECK(blockSize > 0);
444
445 uint32_t batches = getSizeOfDimension(input, 0);
446 uint32_t height = getSizeOfDimension(input, 1);
447 uint32_t width = getSizeOfDimension(input, 2);
448 uint32_t channels = getSizeOfDimension(input, 3);
449
450 NN_OPS_CHECK(height % blockSize == 0);
451 NN_OPS_CHECK(width % blockSize == 0);
452
453 output->type = input.type;
454 output->dimensions = {batches, height / blockSize, width / blockSize,
455 channels * (blockSize * blockSize)};
456 output->offset = input.offset;
457 output->scale = input.scale;
458
459 return true;
460 }
461
embeddingLookupPrepare(const Shape & valueShape,const Shape & lookupShape,Shape * outputShape)462 bool embeddingLookupPrepare(const Shape& valueShape, const Shape& lookupShape, Shape* outputShape) {
463 NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 2);
464 NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1);
465
466 const uint32_t columns = getSizeOfDimension(valueShape, 1);
467 const uint32_t lookups = getSizeOfDimension(lookupShape, 0);
468
469 outputShape->type = valueShape.type;
470 outputShape->dimensions = {lookups, columns};
471 for (uint32_t i = 2; i < getNumberOfDimensions(valueShape); i++) {
472 outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i));
473 }
474 outputShape->offset = valueShape.offset;
475 outputShape->scale = valueShape.scale;
476
477 return true;
478 }
479
hashtableLookupPrepare(const Shape & lookupShape,const Shape & keyShape,const Shape & valueShape,Shape * outputShape,Shape * hitShape)480 bool hashtableLookupPrepare(const Shape& lookupShape, const Shape& keyShape,
481 const Shape& valueShape, Shape* outputShape, Shape* hitShape) {
482 NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1);
483 NN_OPS_CHECK(getNumberOfDimensions(keyShape) == 1);
484 NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 1);
485
486 const uint32_t lookups = getSizeOfDimension(lookupShape, 0);
487 outputShape->type = valueShape.type;
488 outputShape->dimensions = {lookups};
489 for (uint32_t i = 1; i < getNumberOfDimensions(valueShape); i++) {
490 outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i));
491 }
492 outputShape->offset = valueShape.offset;
493 outputShape->scale = valueShape.scale;
494
495 hitShape->type = OperandType::TENSOR_QUANT8_ASYMM;
496 hitShape->dimensions = {lookups};
497 hitShape->offset = 0;
498 hitShape->scale = 1.f;
499
500 return true;
501 }
502
padPrepare(const Shape & input,const int32_t * paddingsData,const Shape & paddingsShape,Shape * output)503 bool padPrepare(const Shape& input, const int32_t* paddingsData, const Shape& paddingsShape,
504 Shape* output) {
505 uint32_t numInputDims = getNumberOfDimensions(input);
506
507 // paddings need to be provided as a 2-D int32 tensor.
508 NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32);
509 NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2);
510 NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == numInputDims);
511 NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2);
512
513 std::vector<uint32_t> outDims(numInputDims);
514 for (uint32_t i = 0; i < numInputDims; ++i) {
515 int32_t beforePadding = *paddingsData++;
516 int32_t afterPadding = *paddingsData++;
517 // Pad value has to be greater than equal to 0.
518 NN_OPS_CHECK(beforePadding >= 0 && afterPadding >= 0);
519 outDims[i] = beforePadding + getSizeOfDimension(input, i) + afterPadding;
520 }
521 output->type = input.type;
522 output->dimensions = outDims;
523 output->offset = input.offset;
524 output->scale = input.scale;
525
526 return true;
527 }
528
batchToSpacePrepare(const Shape & input,const int32_t * blockSizeData,const Shape & blockSizeShape,Shape * output)529 bool batchToSpacePrepare(const Shape& input, const int32_t* blockSizeData,
530 const Shape& blockSizeShape, Shape* output) {
531 // Only 4D NHWC tensors are supported.
532 NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
533
534 // blockSize need to be provided as a 1-D int32 tensor.
535 NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32);
536 NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1);
537 // Only applies to spatial dimensions.
538 NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2);
539
540 uint32_t batches = getSizeOfDimension(input, 0);
541 uint32_t height = getSizeOfDimension(input, 1);
542 uint32_t width = getSizeOfDimension(input, 2);
543 uint32_t channels = getSizeOfDimension(input, 3);
544
545 NN_OPS_CHECK(batches % (blockSizeData[0] * blockSizeData[1]) == 0);
546 output->type = input.type;
547 output->dimensions = {batches / (blockSizeData[0] * blockSizeData[1]),
548 height * blockSizeData[0], width * blockSizeData[1], channels};
549 output->offset = input.offset;
550 output->scale = input.scale;
551
552 return true;
553 }
554
spaceToBatchPrepare(const Shape & input,const int32_t * blockSizeData,const Shape & blockSizeShape,const int32_t * paddingsData,const Shape & paddingsShape,Shape * output)555 bool spaceToBatchPrepare(const Shape& input, const int32_t* blockSizeData,
556 const Shape& blockSizeShape, const int32_t* paddingsData,
557 const Shape& paddingsShape, Shape* output) {
558 // Only 4D NHWC tensors are supported.
559 NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
560
561 // blockSize need to be provided as a 1-D int32 tensor.
562 NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32);
563 NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1);
564 // Only applies to spatial dimensions.
565 NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2);
566
567 // paddings need to be provided as a 2-D int32 tensor.
568 NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32);
569 NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2);
570 NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == 2);
571 NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2);
572
573 uint32_t batches = getSizeOfDimension(input, 0);
574 uint32_t height = getSizeOfDimension(input, 1);
575 uint32_t width = getSizeOfDimension(input, 2);
576 uint32_t channels = getSizeOfDimension(input, 3);
577
578 uint32_t paddedHeight = paddingsData[0] + height + paddingsData[1];
579 uint32_t paddedWidth = paddingsData[2] + width + paddingsData[3];
580
581 NN_OPS_CHECK(paddedHeight % blockSizeData[0] == 0);
582 NN_OPS_CHECK(paddedWidth % blockSizeData[1] == 0);
583
584 output->type = input.type;
585 output->dimensions = {batches * (blockSizeData[0] * blockSizeData[1]),
586 paddedHeight / blockSizeData[0], paddedWidth / blockSizeData[1],
587 channels};
588 output->offset = input.offset;
589 output->scale = input.scale;
590
591 return true;
592 }
593
meanPrepare(const Shape & input,const int32_t * axisData,const Shape & axisShape,bool keepDims,Shape * output)594 bool meanPrepare(const Shape& input, const int32_t* axisData, const Shape& axisShape, bool keepDims,
595 Shape* output) {
596 // perm need to be provided as a 1-D int32 tensor.
597 NN_OPS_CHECK(axisShape.type == OperandType::TENSOR_INT32);
598 NN_OPS_CHECK(getNumberOfDimensions(axisShape) == 1);
599
600 int32_t numInputDims = static_cast<int32_t>(getNumberOfDimensions(input));
601 int32_t axisSize = static_cast<int32_t>(getSizeOfDimension(axisShape, 0));
602
603 // Determines size of output tensor.
604 if (keepDims) {
605 std::vector<uint32_t> outDims(numInputDims);
606 for (int32_t idx = 0; idx < numInputDims; ++idx) {
607 bool isAxis = false;
608 for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) {
609 if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) {
610 isAxis = true;
611 break;
612 }
613 }
614 if (isAxis) {
615 outDims[idx] = 1;
616 } else {
617 outDims[idx] = getSizeOfDimension(input, idx);
618 }
619 }
620 output->dimensions = outDims;
621 } else {
622 // Calculates size of reducing axis.
623 int32_t numReduceAxis = axisSize;
624 for (int32_t i = 0; i < axisSize; ++i) {
625 int32_t current = axisData[i];
626 if (current < 0) {
627 current += numInputDims;
628 }
629 NN_OPS_CHECK(current >= 0 && current < numInputDims);
630 for (int32_t j = 0; j < i; ++j) {
631 int32_t previous = axisData[j];
632 if (previous < 0) {
633 previous += numInputDims;
634 }
635 if (current == previous) {
636 --numReduceAxis;
637 break;
638 }
639 }
640 }
641 // Determines output dimensions.
642 std::vector<uint32_t> outDims(numInputDims - numReduceAxis);
643 int32_t numSkipAxis = 0;
644 for (int32_t idx = 0; idx < numInputDims; ++idx) {
645 bool isAxis = false;
646 for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) {
647 if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) {
648 ++numSkipAxis;
649 isAxis = true;
650 break;
651 }
652 }
653 if (!isAxis) {
654 outDims[idx - numSkipAxis] = getSizeOfDimension(input, idx);
655 }
656 }
657 // Handle the case when all dimensions are removed
658 if (outDims.empty()) {
659 outDims.push_back(1);
660 }
661 output->dimensions = outDims;
662 }
663
664 output->type = input.type;
665 output->offset = input.offset;
666 output->scale = input.scale;
667
668 return true;
669 }
670
argMinMaxPrepare(const Shape & input,int32_t axis,Shape * output)671 bool argMinMaxPrepare(const Shape& input, int32_t axis, Shape* output) {
672 NN_CHECK(handleNegativeAxis(input, &axis));
673
674 output->type = OperandType::TENSOR_INT32;
675
676 // Copy the input dimensions, omitting the axis dimension.
677 output->dimensions.clear();
678 if (getNumberOfDimensions(input) > 1) {
679 output->dimensions.reserve(getNumberOfDimensions(input) - 1);
680 output->dimensions.insert(output->dimensions.end(), input.dimensions.begin(),
681 input.dimensions.begin() + axis);
682 output->dimensions.insert(output->dimensions.end(), input.dimensions.begin() + axis + 1,
683 input.dimensions.end());
684 } else {
685 output->dimensions.push_back(1);
686 }
687
688 return true;
689 }
690
splitPrepare(const Shape & input,int32_t axis,int32_t numOutputs,std::vector<Shape> * output)691 bool splitPrepare(const Shape& input, int32_t axis, int32_t numOutputs,
692 std::vector<Shape>* output) {
693 NN_CHECK(handleNegativeAxis(input, &axis));
694
695 const int32_t sizeOfAxisToSplit = input.dimensions[axis];
696 NN_OPS_CHECK(sizeOfAxisToSplit % numOutputs == 0);
697 const int32_t sliceSize = sizeOfAxisToSplit / numOutputs;
698
699 for (int i = 0; i < numOutputs; ++i) {
700 output->at(i).type = input.type;
701 output->at(i).dimensions = input.dimensions;
702 output->at(i).dimensions[axis] = sliceSize;
703 output->at(i).offset = input.offset;
704 output->at(i).scale = input.scale;
705 }
706 return true;
707 }
708
groupedConvPrepare(const Shape & input,const Shape & filter,const Shape & bias,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,Shape * output)709 bool groupedConvPrepare(const Shape& input, const Shape& filter, const Shape& bias,
710 int32_t padding_left, int32_t padding_right, int32_t padding_top,
711 int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
712 int32_t numGroups, Shape* output) {
713 if (filter.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
714 NN_OPS_CHECK(input.type == OperandType::TENSOR_QUANT8_ASYMM ||
715 input.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED);
716 } else {
717 NN_OPS_CHECK(input.type == filter.type);
718 }
719 if (input.type == OperandType::TENSOR_QUANT8_ASYMM ||
720 input.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
721 NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32);
722 } else {
723 NN_OPS_CHECK(input.type == bias.type);
724 }
725 NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
726 NN_OPS_CHECK(getNumberOfDimensions(filter) == 4);
727 NN_OPS_CHECK(getNumberOfDimensions(bias) == 1);
728
729 NN_OPS_CHECK(getSizeOfDimension(filter, 0) == getSizeOfDimension(bias, 0));
730
731 NN_OPS_CHECK(getSizeOfDimension(filter, 3) * numGroups == getSizeOfDimension(input, 3));
732 NN_OPS_CHECK(getSizeOfDimension(filter, 0) % numGroups == 0);
733
734 uint32_t channels_out = getSizeOfDimension(filter, 0);
735 uint32_t width = getSizeOfDimension(input, 2);
736 uint32_t height = getSizeOfDimension(input, 1);
737 uint32_t filterWidth = getSizeOfDimension(filter, 2);
738 uint32_t filterHeight = getSizeOfDimension(filter, 1);
739 uint32_t batches = getSizeOfDimension(input, 0);
740
741 NN_RET_CHECK_GT(static_cast<int32_t>(filterWidth), padding_left);
742 NN_RET_CHECK_GT(static_cast<int32_t>(filterWidth), padding_right);
743 NN_RET_CHECK_GT(static_cast<int32_t>(filterHeight), padding_top);
744 NN_RET_CHECK_GT(static_cast<int32_t>(filterHeight), padding_bottom);
745
746 uint32_t outWidth =
747 computeOutSize(width, filterWidth, stride_width, padding_left, padding_right);
748 uint32_t outHeight =
749 computeOutSize(height, filterHeight, stride_height, padding_top, padding_bottom);
750
751 output->type = input.type;
752 output->dimensions = {batches, outHeight, outWidth, channels_out};
753 return true;
754 }
755
756 } // namespace nn
757 } // namespace android
758