1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Operations"
18
19 #include <algorithm>
20 #include <cfloat>
21 #include <cmath>
22 #include <numeric>
23 #include <utility>
24 #include <vector>
25
26 #include "CpuOperationUtils.h"
27 #include "HalInterfaces.h"
28 #include "OperationResolver.h"
29 #include "OperationsUtils.h"
30 #include "Tracing.h"
31
32 namespace android {
33 namespace nn {
34 namespace bbox_ops {
35
36 namespace {
37
38 using namespace hal;
39
40 struct BoxEncodingCorner {
41 float x1, y1, x2, y2;
42 };
43 struct BoxEncodingCenter {
44 float w, h, x, y;
45 };
toBoxEncodingCorner(const BoxEncodingCenter & ctr)46 BoxEncodingCorner toBoxEncodingCorner(const BoxEncodingCenter& ctr) {
47 return {.x1 = ctr.x - ctr.w / 2,
48 .y1 = ctr.y - ctr.h / 2,
49 .x2 = ctr.x + ctr.w / 2,
50 .y2 = ctr.y + ctr.h / 2};
51 }
toBoxEncodingCenter(const BoxEncodingCorner & cnr)52 BoxEncodingCenter toBoxEncodingCenter(const BoxEncodingCorner& cnr) {
53 return {.w = cnr.x2 - cnr.x1,
54 .h = cnr.y2 - cnr.y1,
55 .x = (cnr.x1 + cnr.x2) / 2,
56 .y = (cnr.y1 + cnr.y2) / 2};
57 }
58
bboxTransformFloat32(const float * roiData,const Shape & roiShape,const float * bboxDeltasData,const Shape & bboxDeltasShape,const int32_t * batchesData,const Shape & batchesShape,const float * imageInfoData,const Shape & imageInfoDataShape,float * outputData,const Shape & outputShape)59 inline bool bboxTransformFloat32(const float* roiData, const Shape& roiShape,
60 const float* bboxDeltasData, const Shape& bboxDeltasShape,
61 const int32_t* batchesData, const Shape& batchesShape,
62 const float* imageInfoData, const Shape& imageInfoDataShape,
63 float* outputData, const Shape& outputShape) {
64 const uint32_t roiLength = 4;
65 const uint32_t imageLength = 2;
66
67 uint32_t numClasses = getSizeOfDimension(bboxDeltasShape, 1) / roiLength;
68 uint32_t numBatches = getSizeOfDimension(imageInfoDataShape, 0);
69
70 const float* roiDataEnd = roiData + getNumberOfElements(roiShape);
71 const float* deltas = bboxDeltasData;
72 float* outPtr = outputData;
73 uint32_t roiIndex = 0;
74 for (const float* roiBase = roiData; roiBase < roiDataEnd; roiBase += roiLength, roiIndex++) {
75 uint32_t batchIndex = batchesData[roiIndex];
76 // Check for malformed data
77 // 1. Invalid batch id
78 // 2. Invalid region: x2 < x1 || y2 < y1
79 NN_RET_CHECK_GE(batchIndex, 0);
80 NN_RET_CHECK_LT(batchIndex, numBatches);
81 NN_RET_CHECK_LE(roiBase[0], roiBase[2]);
82 NN_RET_CHECK_LE(roiBase[1], roiBase[3]);
83
84 const float* imageInfoBase = imageInfoData + batchIndex * imageLength;
85 float imageHeight = imageInfoBase[0];
86 float imageWidth = imageInfoBase[1];
87 auto roiBefore = toBoxEncodingCenter(
88 {.x1 = roiBase[0], .y1 = roiBase[1], .x2 = roiBase[2], .y2 = roiBase[3]});
89 for (uint32_t i = 0; i < numClasses; i++) {
90 auto roiAfter = toBoxEncodingCorner({.w = std::exp(deltas[2]) * roiBefore.w,
91 .h = std::exp(deltas[3]) * roiBefore.h,
92 .x = roiBefore.x + deltas[0] * roiBefore.w,
93 .y = roiBefore.y + deltas[1] * roiBefore.h});
94 BoxEncodingCorner cliped = {.x1 = std::min(std::max(roiAfter.x1, 0.0f), imageWidth),
95 .y1 = std::min(std::max(roiAfter.y1, 0.0f), imageHeight),
96 .x2 = std::min(std::max(roiAfter.x2, 0.0f), imageWidth),
97 .y2 = std::min(std::max(roiAfter.y2, 0.0f), imageHeight)};
98 outPtr[0] = cliped.x1;
99 outPtr[1] = cliped.y1;
100 outPtr[2] = cliped.x2;
101 outPtr[3] = cliped.y2;
102 deltas += roiLength;
103 outPtr += roiLength;
104 }
105 }
106 return true;
107 }
108
bboxTransformFloat16(const _Float16 * roiData,const Shape & roiShape,const _Float16 * bboxDeltasData,const Shape & bboxDeltasShape,const int32_t * batchesData,const Shape & batchesShape,const _Float16 * imageInfoData,const Shape & imageInfoDataShape,_Float16 * outputData,const Shape & outputShape)109 inline bool bboxTransformFloat16(const _Float16* roiData, const Shape& roiShape,
110 const _Float16* bboxDeltasData, const Shape& bboxDeltasShape,
111 const int32_t* batchesData, const Shape& batchesShape,
112 const _Float16* imageInfoData, const Shape& imageInfoDataShape,
113 _Float16* outputData, const Shape& outputShape) {
114 std::vector<float> roi_float32(getNumberOfElements(roiShape));
115 convertFloat16ToFloat32(roiData, &roi_float32);
116 std::vector<float> delta_float32(getNumberOfElements(bboxDeltasShape));
117 convertFloat16ToFloat32(bboxDeltasData, &delta_float32);
118 std::vector<float> imageInfo_float32(getNumberOfElements(imageInfoDataShape));
119 convertFloat16ToFloat32(imageInfoData, &imageInfo_float32);
120 std::vector<float> output_float32(getNumberOfElements(outputShape));
121 NN_RET_CHECK(bboxTransformFloat32(roi_float32.data(), roiShape, delta_float32.data(),
122 bboxDeltasShape, batchesData, batchesShape,
123 imageInfo_float32.data(), imageInfoDataShape,
124 output_float32.data(), outputShape));
125 convertFloat32ToFloat16(output_float32, outputData);
126 return true;
127 }
128
bboxTransformQuant(const uint16_t * roiData,const Shape & roiShape,const uint8_t * bboxDeltasData,const Shape & bboxDeltasShape,const int32_t * batchesData,const Shape & batchesShape,const uint16_t * imageInfoData,const Shape & imageInfoDataShape,uint16_t * outputData,const Shape & outputShape)129 inline bool bboxTransformQuant(const uint16_t* roiData, const Shape& roiShape,
130 const uint8_t* bboxDeltasData, const Shape& bboxDeltasShape,
131 const int32_t* batchesData, const Shape& batchesShape,
132 const uint16_t* imageInfoData, const Shape& imageInfoDataShape,
133 uint16_t* outputData, const Shape& outputShape) {
134 std::vector<float> roi_float32(getNumberOfElements(roiShape));
135 convertQuantToFloat32(roiData, roiShape.scale, roiShape.offset, &roi_float32);
136 std::vector<float> delta_float32(getNumberOfElements(bboxDeltasShape));
137 convertQuantToFloat32(bboxDeltasData, bboxDeltasShape.scale, bboxDeltasShape.offset,
138 &delta_float32);
139 std::vector<float> imageInfo_float32(getNumberOfElements(imageInfoDataShape));
140 convertQuantToFloat32(imageInfoData, imageInfoDataShape.scale, imageInfoDataShape.offset,
141 &imageInfo_float32);
142 std::vector<float> output_float32(getNumberOfElements(outputShape));
143 NN_RET_CHECK(bboxTransformFloat32(roi_float32.data(), roiShape, delta_float32.data(),
144 bboxDeltasShape, batchesData, batchesShape,
145 imageInfo_float32.data(), imageInfoDataShape,
146 output_float32.data(), outputShape));
147 convertFloat32ToQuant(output_float32, outputShape.scale, outputShape.offset, outputData);
148 return true;
149 }
150
bboxTransformQuant(const uint16_t * roiData,const Shape & roiShape,const int8_t * bboxDeltasData,const Shape & bboxDeltasShape,const int32_t * batchesData,const Shape & batchesShape,const uint16_t * imageInfoData,const Shape & imageInfoDataShape,uint16_t * outputData,const Shape & outputShape)151 inline bool bboxTransformQuant(const uint16_t* roiData, const Shape& roiShape,
152 const int8_t* bboxDeltasData, const Shape& bboxDeltasShape,
153 const int32_t* batchesData, const Shape& batchesShape,
154 const uint16_t* imageInfoData, const Shape& imageInfoDataShape,
155 uint16_t* outputData, const Shape& outputShape) {
156 std::vector<float> roi_float32(getNumberOfElements(roiShape));
157 convertQuantToFloat32(roiData, roiShape.scale, roiShape.offset, &roi_float32);
158 std::vector<float> delta_float32(getNumberOfElements(bboxDeltasShape));
159 convertQuantToFloat32<int8_t>(bboxDeltasData, bboxDeltasShape.scale, bboxDeltasShape.offset,
160 &delta_float32);
161 std::vector<float> imageInfo_float32(getNumberOfElements(imageInfoDataShape));
162 convertQuantToFloat32(imageInfoData, imageInfoDataShape.scale, imageInfoDataShape.offset,
163 &imageInfo_float32);
164 std::vector<float> output_float32(getNumberOfElements(outputShape));
165 NN_RET_CHECK(bboxTransformFloat32(roi_float32.data(), roiShape, delta_float32.data(),
166 bboxDeltasShape, batchesData, batchesShape,
167 imageInfo_float32.data(), imageInfoDataShape,
168 output_float32.data(), outputShape));
169 convertFloat32ToQuant(output_float32, outputShape.scale, outputShape.offset, outputData);
170 return true;
171 }
172
173 // Taking two indices of bounding boxes, return the intersection-of-union.
getIoUAxisAligned(const float * roi1,const float * roi2)174 float getIoUAxisAligned(const float* roi1, const float* roi2) {
175 const float area1 = (roi1[2] - roi1[0]) * (roi1[3] - roi1[1]);
176 const float area2 = (roi2[2] - roi2[0]) * (roi2[3] - roi2[1]);
177 const float x1 = std::max(roi1[0], roi2[0]);
178 const float x2 = std::min(roi1[2], roi2[2]);
179 const float y1 = std::max(roi1[1], roi2[1]);
180 const float y2 = std::min(roi1[3], roi2[3]);
181 const float w = std::max(x2 - x1, 0.0f);
182 const float h = std::max(y2 - y1, 0.0f);
183 const float areaIntersect = w * h;
184 const float areaUnion = area1 + area2 - areaIntersect;
185 return areaIntersect / areaUnion;
186 }
187
188 } // namespace
189
190 namespace axis_aligned_bbox_transform {
191
192 constexpr char kOperationName[] = "AXIS_ALIGNED_BBOX_TRANSFORM";
193
194 constexpr uint32_t kNumInputs = 4;
195 constexpr uint32_t kRoiTensor = 0;
196 constexpr uint32_t kDeltaTensor = 1;
197 constexpr uint32_t kBatchesTensor = 2;
198 constexpr uint32_t kImageInfoTensor = 3;
199
200 constexpr uint32_t kNumOutputs = 1;
201 constexpr uint32_t kOutputTensor = 0;
202
validate(const IOperationValidationContext * context)203 bool validate(const IOperationValidationContext* context) {
204 NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs);
205 NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs);
206 std::vector<OperandType> inExpectedTypes;
207 auto inputType = context->getInputType(kRoiTensor);
208 auto deltaInputType = context->getInputType(kDeltaTensor);
209 if (inputType == OperandType::TENSOR_FLOAT32 || inputType == OperandType::TENSOR_FLOAT16) {
210 inExpectedTypes = {inputType, inputType, OperandType::TENSOR_INT32, inputType};
211 } else if (inputType == OperandType::TENSOR_QUANT16_ASYMM) {
212 if (deltaInputType == OperandType::TENSOR_QUANT8_ASYMM ||
213 deltaInputType == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
214 inExpectedTypes = {OperandType::TENSOR_QUANT16_ASYMM, deltaInputType,
215 OperandType::TENSOR_INT32, OperandType::TENSOR_QUANT16_ASYMM};
216 } else {
217 LOG(ERROR) << "Unsupported input tensor type for operation " << kOperationName;
218 return false;
219 }
220 } else {
221 LOG(ERROR) << "Unsupported input tensor type for operation " << kOperationName;
222 return false;
223 }
224 NN_RET_CHECK(validateInputTypes(context, inExpectedTypes));
225 NN_RET_CHECK(validateOutputTypes(context, {inputType}));
226 return validateHalVersion(context, HalVersion::V1_2);
227 }
228
prepare(IOperationExecutionContext * context)229 bool prepare(IOperationExecutionContext* context) {
230 Shape roiShape = context->getInputShape(kRoiTensor);
231 Shape bboxDeltasShape = context->getInputShape(kDeltaTensor);
232 Shape batchesShape = context->getInputShape(kBatchesTensor);
233 Shape imageInfoShape = context->getInputShape(kImageInfoTensor);
234 Shape outputShape = context->getOutputShape(kOutputTensor);
235
236 NN_RET_CHECK_EQ(getNumberOfDimensions(roiShape), 2);
237 NN_RET_CHECK_EQ(getNumberOfDimensions(bboxDeltasShape), 2);
238 NN_RET_CHECK_EQ(getNumberOfDimensions(batchesShape), 1);
239 NN_RET_CHECK_EQ(getNumberOfDimensions(imageInfoShape), 2);
240
241 // Only numRois can be zero.
242 const uint32_t kRoiDim = 4;
243 uint32_t numRois = getSizeOfDimension(roiShape, 0);
244 uint32_t numClasses = getSizeOfDimension(bboxDeltasShape, 1) / kRoiDim;
245 uint32_t numBatches = getSizeOfDimension(imageInfoShape, 0);
246 NN_RET_CHECK_GT(numClasses, 0);
247 NN_RET_CHECK_GT(numBatches, 0);
248 NN_RET_CHECK_EQ(getSizeOfDimension(roiShape, 1), kRoiDim);
249 NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, 0), numRois);
250 NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, 1), kRoiDim * numClasses);
251 NN_RET_CHECK_EQ(getSizeOfDimension(batchesShape, 0), numRois);
252 NN_RET_CHECK_EQ(getSizeOfDimension(imageInfoShape, 1), 2);
253
254 if (roiShape.type == OperandType::TENSOR_QUANT16_ASYMM) {
255 NN_RET_CHECK_EQ(roiShape.scale, 0.125f);
256 NN_RET_CHECK_EQ(roiShape.offset, 0);
257 NN_RET_CHECK_EQ(imageInfoShape.scale, 0.125f);
258 NN_RET_CHECK_EQ(imageInfoShape.offset, 0);
259 }
260
261 outputShape.type = roiShape.type;
262 outputShape.dimensions = {numRois, numClasses * kRoiDim};
263 outputShape.scale = 0.f;
264 outputShape.offset = 0;
265 if (roiShape.type == OperandType::TENSOR_QUANT16_ASYMM) {
266 outputShape.scale = 0.125f;
267 }
268 NN_RET_CHECK(context->setOutputShape(kOutputTensor, outputShape));
269 return true;
270 }
271
execute(IOperationExecutionContext * context)272 bool execute(IOperationExecutionContext* context) {
273 NNTRACE_TRANS("axisAlignedBBoxTransform");
274 // Bypass execution in the case of zero-sized input.
275 if (getNumberOfElements(context->getOutputShape(kOutputTensor)) == 0) return true;
276 switch (context->getInputType(kRoiTensor)) {
277 case OperandType::TENSOR_FLOAT16: {
278 return bboxTransformFloat16(context->getInputBuffer<_Float16>(kRoiTensor),
279 context->getInputShape(kRoiTensor),
280 context->getInputBuffer<_Float16>(kDeltaTensor),
281 context->getInputShape(kDeltaTensor),
282 context->getInputBuffer<int32_t>(kBatchesTensor),
283 context->getInputShape(kBatchesTensor),
284 context->getInputBuffer<_Float16>(kImageInfoTensor),
285 context->getInputShape(kImageInfoTensor),
286 context->getOutputBuffer<_Float16>(kOutputTensor),
287 context->getOutputShape(kOutputTensor));
288 }
289 case OperandType::TENSOR_FLOAT32: {
290 return bboxTransformFloat32(context->getInputBuffer<float>(kRoiTensor),
291 context->getInputShape(kRoiTensor),
292 context->getInputBuffer<float>(kDeltaTensor),
293 context->getInputShape(kDeltaTensor),
294 context->getInputBuffer<int32_t>(kBatchesTensor),
295 context->getInputShape(kBatchesTensor),
296 context->getInputBuffer<float>(kImageInfoTensor),
297 context->getInputShape(kImageInfoTensor),
298 context->getOutputBuffer<float>(kOutputTensor),
299 context->getOutputShape(kOutputTensor));
300 }
301 case OperandType::TENSOR_QUANT16_ASYMM: {
302 if (context->getInputType(kDeltaTensor) == OperandType::TENSOR_QUANT8_ASYMM) {
303 return bboxTransformQuant(context->getInputBuffer<uint16_t>(kRoiTensor),
304 context->getInputShape(kRoiTensor),
305 context->getInputBuffer<uint8_t>(kDeltaTensor),
306 context->getInputShape(kDeltaTensor),
307 context->getInputBuffer<int32_t>(kBatchesTensor),
308 context->getInputShape(kBatchesTensor),
309 context->getInputBuffer<uint16_t>(kImageInfoTensor),
310 context->getInputShape(kImageInfoTensor),
311 context->getOutputBuffer<uint16_t>(kOutputTensor),
312 context->getOutputShape(kOutputTensor));
313 } else {
314 return bboxTransformQuant(context->getInputBuffer<uint16_t>(kRoiTensor),
315 context->getInputShape(kRoiTensor),
316 context->getInputBuffer<int8_t>(kDeltaTensor),
317 context->getInputShape(kDeltaTensor),
318 context->getInputBuffer<int32_t>(kBatchesTensor),
319 context->getInputShape(kBatchesTensor),
320 context->getInputBuffer<uint16_t>(kImageInfoTensor),
321 context->getInputShape(kImageInfoTensor),
322 context->getOutputBuffer<uint16_t>(kOutputTensor),
323 context->getOutputShape(kOutputTensor));
324 }
325 }
326 default:
327 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
328 }
329 }
330
331 } // namespace axis_aligned_bbox_transform
332
333 namespace box_with_nms_limit {
334
335 constexpr char kOperationName[] = "BOX_WITH_NMS_LIMIT";
336
337 constexpr uint32_t kNumInputs = 9;
338 constexpr uint32_t kScoreTensor = 0;
339 constexpr uint32_t kRoiTensor = 1;
340 constexpr uint32_t kBatchesTensor = 2;
341 constexpr uint32_t kScoreThresholdScalar = 3;
342 constexpr uint32_t kMaxNumDetectionScalar = 4;
343 constexpr uint32_t kNmsKernelScalar = 5;
344 constexpr uint32_t kIoUThresholdScalar = 6;
345 constexpr uint32_t kSigmaScalar = 7;
346 constexpr uint32_t kNmsScoreThresholdScalar = 8;
347
348 constexpr uint32_t kNumOutputs = 4;
349 constexpr uint32_t kOutputScoreTensor = 0;
350 constexpr uint32_t kOutputRoiTensor = 1;
351 constexpr uint32_t kOutputClassTensor = 2;
352 constexpr uint32_t kOutputBatchesTensor = 3;
353
354 namespace {
355
356 // TODO(xusongw): Reduce code duplication with hard/soft nms path.
357
358 // Inplace hard NMS within range [select, select + selectLength).
hardNmsSingleClass(const float * scoresData,float iouThreshold,int32_t maxNumDetections,std::function<const float * (uint32_t)> getRoiBase,uint32_t * select,uint32_t selectLength)359 uint32_t* hardNmsSingleClass(const float* scoresData, float iouThreshold, int32_t maxNumDetections,
360 std::function<const float*(uint32_t)> getRoiBase, uint32_t* select,
361 uint32_t selectLength) {
362 uint32_t *selectStart = select, *selectEnd = select + selectLength, numDetections = 0;
363 if (maxNumDetections < 0) {
364 maxNumDetections = selectLength;
365 }
366 while (selectStart < selectEnd && numDetections < maxNumDetections) {
367 // find max score and swap to the front
368 auto& maxScore = *std::max_element(selectStart, selectEnd,
369 [&scoresData](const uint32_t& lhs, const uint32_t& rhs) {
370 return scoresData[lhs] < scoresData[rhs];
371 });
372 std::swap(maxScore, *selectStart);
373
374 // Calculate IoU of the rest, swap to the end (disgard) if needed.
375 for (uint32_t* i = selectStart + 1; i < selectEnd; i++) {
376 float iou = getIoUAxisAligned(getRoiBase(*i), getRoiBase(*selectStart));
377 if (iou >= iouThreshold) {
378 std::swap(*i--, *(--selectEnd));
379 }
380 }
381 selectStart++;
382 numDetections++;
383 }
384 return selectStart;
385 }
386
hardNmsMultiClass(const float * scoresData,uint32_t numClasses,uint32_t numRois,float scoreThreshold,float iouThreshold,int32_t maxNumDetections,int32_t maxNumDetectionsPerClass,std::function<const float * (uint32_t)> getRoiBase,std::vector<uint32_t> * select)387 void hardNmsMultiClass(const float* scoresData, uint32_t numClasses, uint32_t numRois,
388 float scoreThreshold, float iouThreshold, int32_t maxNumDetections,
389 int32_t maxNumDetectionsPerClass,
390 std::function<const float*(uint32_t)> getRoiBase,
391 std::vector<uint32_t>* select) {
392 // Exclude class 0 (background)
393 for (uint32_t c = 1; c < numClasses; c++) {
394 uint32_t size = select->size();
395 for (uint32_t b = 0; b < numRois; b++) {
396 const uint32_t index = b * numClasses + c;
397 const float score = scoresData[index];
398 if (score > scoreThreshold) {
399 select->push_back(index);
400 }
401 }
402 uint32_t* selectStart = select->data() + size;
403 uint32_t selectLength = select->size() - size;
404 uint32_t* selectEnd = hardNmsSingleClass(scoresData, iouThreshold, maxNumDetectionsPerClass,
405 getRoiBase, selectStart, selectLength);
406 select->resize(selectEnd - select->data());
407 }
408
409 // Take top maxNumDetections.
410 std::sort(select->begin(), select->end(),
411 [&scoresData](const uint32_t& lhs, const uint32_t& rhs) {
412 return scoresData[lhs] > scoresData[rhs];
413 });
414 if (maxNumDetections < 0 || select->size() <= maxNumDetections) {
415 return;
416 }
417 select->resize(maxNumDetections);
418 }
419
420 // Inplace soft NMS within range [select, select + selectLength).
421 using SoftNmsKernel = std::function<float(float)>;
softNmsSingleClass(float * scoresData,float scoreThreshold,int32_t maxNumDetections,std::function<const float * (uint32_t)> getRoiBase,SoftNmsKernel kernel,uint32_t * select,uint32_t selectLength)422 uint32_t* softNmsSingleClass(float* scoresData, float scoreThreshold, int32_t maxNumDetections,
423 std::function<const float*(uint32_t)> getRoiBase, SoftNmsKernel kernel,
424 uint32_t* select, uint32_t selectLength) {
425 uint32_t *selectStart = select, *selectEnd = select + selectLength, numDetections = 0;
426 if (maxNumDetections < 0) {
427 maxNumDetections = selectLength;
428 }
429 while (selectStart < selectEnd && numDetections < maxNumDetections) {
430 // find max score and swap to the front
431 auto& maxScore = *std::max_element(selectStart, selectEnd,
432 [&scoresData](const uint32_t& lhs, const uint32_t& rhs) {
433 return scoresData[lhs] < scoresData[rhs];
434 });
435 std::swap(maxScore, *selectStart);
436
437 // Calculate IoU of the rest, swap to the end (disgard) if needed.
438 for (uint32_t* i = selectStart + 1; i < selectEnd; i++) {
439 float iou = getIoUAxisAligned(getRoiBase(*i), getRoiBase(*selectStart));
440 scoresData[*i] *= kernel(iou);
441 if (scoresData[*i] < scoreThreshold) {
442 std::swap(*i--, *(--selectEnd));
443 }
444 }
445 selectStart++;
446 numDetections++;
447 }
448 return selectStart;
449 }
450
softNmsMultiClass(float * scoresData,uint32_t numClasses,uint32_t numRois,float scoreThreshold,float nmsScoreThreshold,int32_t maxNumDetections,int32_t maxNumDetectionsPerClass,std::function<const float * (uint32_t)> getRoiBase,SoftNmsKernel kernel,std::vector<uint32_t> * select)451 void softNmsMultiClass(float* scoresData, uint32_t numClasses, uint32_t numRois,
452 float scoreThreshold, float nmsScoreThreshold, int32_t maxNumDetections,
453 int32_t maxNumDetectionsPerClass,
454 std::function<const float*(uint32_t)> getRoiBase, SoftNmsKernel kernel,
455 std::vector<uint32_t>* select) {
456 // Exclude class 0 (background)
457 for (uint32_t c = 1; c < numClasses; c++) {
458 uint32_t size = select->size();
459 for (uint32_t b = 0; b < numRois; b++) {
460 const uint32_t index = b * numClasses + c;
461 const float score = scoresData[index];
462 if (score > scoreThreshold) {
463 select->push_back(index);
464 }
465 }
466 uint32_t* selectStart = select->data() + size;
467 uint32_t selectLength = select->size() - size;
468 uint32_t* selectEnd =
469 softNmsSingleClass(scoresData, nmsScoreThreshold, maxNumDetectionsPerClass,
470 getRoiBase, kernel, selectStart, selectLength);
471 select->resize(selectEnd - select->data());
472 }
473
474 // Take top maxNumDetections.
475 std::sort(select->begin(), select->end(),
476 [&scoresData](const uint32_t& lhs, const uint32_t& rhs) {
477 return scoresData[lhs] > scoresData[rhs];
478 });
479 if (maxNumDetections < 0 || select->size() <= maxNumDetections) {
480 return;
481 }
482 select->resize(maxNumDetections);
483 }
484
boxWithNmsLimitFloat32Compute(float * scoresData,const Shape & scoresShape,const float * roiData,const Shape & roiShape,const int32_t * batchesData,const Shape & batchesShape,float scoreThreshold,int32_t maxNumDetections,int32_t softNmsKernel,float iouThreshold,float sigma,float nmsScoreThreshold,std::vector<uint32_t> * batchSplitIn,std::vector<uint32_t> * batchSplitOut,std::vector<uint32_t> * selected)485 bool boxWithNmsLimitFloat32Compute(float* scoresData, const Shape& scoresShape,
486 const float* roiData, const Shape& roiShape,
487 const int32_t* batchesData, const Shape& batchesShape,
488 float scoreThreshold, int32_t maxNumDetections,
489 int32_t softNmsKernel, float iouThreshold, float sigma,
490 float nmsScoreThreshold, std::vector<uint32_t>* batchSplitIn,
491 std::vector<uint32_t>* batchSplitOut,
492 std::vector<uint32_t>* selected) {
493 SoftNmsKernel kernel = nullptr;
494 if (softNmsKernel == 0) {
495 kernel = [&iouThreshold](float iou) { return iou < iouThreshold ? 1.0f : 0.0f; };
496 } else if (softNmsKernel == 1) {
497 kernel = [&iouThreshold](float iou) { return iou < iouThreshold ? 1.0f : 1.0f - iou; };
498 } else if (softNmsKernel == 2) {
499 kernel = [&sigma](float iou) { return std::exp(-1.0f * iou * iou / sigma); };
500 } else {
501 NN_RET_CHECK_FAIL() << "Unsupported soft NMS kernel " << softNmsKernel;
502 }
503
504 const uint32_t kRoiDim = 4;
505 uint32_t numRois = getSizeOfDimension(scoresShape, 0);
506 uint32_t numClasses = getSizeOfDimension(scoresShape, 1);
507
508 // We assume boxes of the same batch are grouped together.
509 std::vector<uint32_t> batch;
510 for (uint32_t i = 0, ind = -1; i < numRois; i++) {
511 if (batchesData[i] == ind) {
512 (batchSplitIn->back())++;
513 } else {
514 ind = batchesData[i];
515 batchSplitIn->push_back(1);
516 }
517 }
518
519 float* scoresBase = scoresData;
520 const float* roiBase = roiData;
521 selected->clear();
522 for (uint32_t b = 0; b < batchSplitIn->size(); b++) {
523 for (uint32_t i = 0; i < batchSplitIn->at(b); i++) {
524 const float* roi = roiBase + i * kRoiDim;
525 // Check for malformed data: invalid region: x2 < x1 || y2 < y1
526 NN_RET_CHECK_LE(roi[0], roi[2]);
527 NN_RET_CHECK_LE(roi[1], roi[3]);
528 }
529 std::vector<uint32_t> result;
530 softNmsMultiClass(
531 scoresBase, numClasses, batchSplitIn->at(b), scoreThreshold, nmsScoreThreshold,
532 maxNumDetections, maxNumDetections,
533 [&roiBase](uint32_t ind) { return roiBase + ind * kRoiDim; }, kernel, &result);
534 // Sort again by class.
535 std::sort(result.begin(), result.end(),
536 [&scoresBase, numClasses](const uint32_t& lhs, const uint32_t& rhs) {
537 uint32_t lhsClass = lhs % numClasses, rhsClass = rhs % numClasses;
538 return lhsClass == rhsClass ? scoresBase[lhs] > scoresBase[rhs]
539 : lhsClass < rhsClass;
540 });
541 selected->insert(selected->end(), result.begin(), result.end());
542 batchSplitOut->push_back(result.size());
543 scoresBase += batchSplitIn->at(b) * numClasses;
544 roiBase += batchSplitIn->at(b) * numClasses * kRoiDim;
545 }
546 return true;
547 }
548
549 template <typename T>
castTo(float val,const Shape &)550 T castTo(float val, const Shape&) {
551 return val;
552 }
553 template <>
castTo(float val,const Shape & shape)554 uint8_t castTo(float val, const Shape& shape) {
555 return saturateCast<uint8_t>(std::round(val / shape.scale + shape.offset));
556 }
557
558 template <>
castTo(float val,const Shape & shape)559 int8_t castTo(float val, const Shape& shape) {
560 return saturateCast<int8_t>(std::round(val / shape.scale + shape.offset));
561 }
562
563 template <typename T_Score, typename T_Roi>
boxWithNmsLimitWriteOutput(const std::vector<uint32_t> & selected,const std::vector<uint32_t> & batchSplitIn,const std::vector<uint32_t> & batchSplitOut,const std::vector<float> & scores,IOperationExecutionContext * context)564 bool boxWithNmsLimitWriteOutput(const std::vector<uint32_t>& selected,
565 const std::vector<uint32_t>& batchSplitIn,
566 const std::vector<uint32_t>& batchSplitOut,
567 const std::vector<float>& scores,
568 IOperationExecutionContext* context) {
569 const uint32_t kRoiDim = 4;
570 Shape scoresShape = context->getInputShape(kScoreTensor);
571 uint32_t numClasses = getSizeOfDimension(scoresShape, 1);
572
573 // Set output dimensions.
574 uint32_t numOutRois = selected.size();
575 if (numOutRois == 0) return true;
576 Shape scoresOutShape = context->getOutputShape(kOutputScoreTensor);
577 scoresOutShape.dimensions = {numOutRois};
578 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, scoresOutShape));
579
580 Shape roiOutShape = context->getOutputShape(kOutputRoiTensor);
581 roiOutShape.dimensions = {numOutRois, 4};
582 NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, roiOutShape));
583
584 Shape classesOutShape = context->getOutputShape(kOutputClassTensor);
585 classesOutShape.dimensions = {numOutRois};
586 NN_RET_CHECK(context->setOutputShape(kOutputClassTensor, classesOutShape));
587
588 Shape batchesOutShape = context->getOutputShape(kOutputBatchesTensor);
589 batchesOutShape.dimensions = {numOutRois};
590 NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, batchesOutShape));
591
592 // Write outputs.
593 const float* scoresBase = scores.data();
594 const T_Roi* roiBase = context->getInputBuffer<T_Roi>(kRoiTensor);
595 const int32_t* batchesInPtr = context->getInputBuffer<int32_t>(kBatchesTensor);
596 T_Score* scoresOutPtr = context->getOutputBuffer<T_Score>(kOutputScoreTensor);
597 T_Roi* roiOutPtr = context->getOutputBuffer<T_Roi>(kOutputRoiTensor);
598 int32_t* classesOutPtr = context->getOutputBuffer<int32_t>(kOutputClassTensor);
599 int32_t* batchesOutPtr = context->getOutputBuffer<int32_t>(kOutputBatchesTensor);
600 uint32_t i = 0;
601 for (uint32_t b = 0; b < batchSplitOut.size(); b++) {
602 for (uint32_t j = 0; j < batchSplitOut[b]; j++) {
603 uint32_t index = selected[i++];
604 *scoresOutPtr++ = castTo<T_Score>(scoresBase[index], scoresOutShape);
605 memcpy(roiOutPtr, roiBase + index * kRoiDim, kRoiDim * sizeof(T_Roi));
606 roiOutPtr += kRoiDim;
607 *classesOutPtr++ = index % numClasses;
608 *batchesOutPtr++ = *batchesInPtr;
609 }
610 scoresBase += batchSplitIn[b] * numClasses;
611 roiBase += batchSplitIn[b] * numClasses * kRoiDim;
612 batchesInPtr += batchSplitIn[b];
613 }
614 return true;
615 }
616
boxWithNmsLimitFloat32(const float * scoresData,const Shape & scoresShape,const float * roiData,const Shape & roiShape,const int32_t * batchesData,const Shape & batchesShape,float scoreThreshold,int32_t maxNumDetections,int32_t softNmsKernel,float iouThreshold,float sigma,float nmsScoreThreshold,float * scoresOutData,Shape scoresOutShape,float * roiOutData,Shape roiOutShape,int32_t * classesOutData,Shape classesOutShape,int32_t * batchesOutData,const Shape & batchSplitOutShape,IOperationExecutionContext * context)617 bool boxWithNmsLimitFloat32(const float* scoresData, const Shape& scoresShape, const float* roiData,
618 const Shape& roiShape, const int32_t* batchesData,
619 const Shape& batchesShape, float scoreThreshold,
620 int32_t maxNumDetections, int32_t softNmsKernel, float iouThreshold,
621 float sigma, float nmsScoreThreshold, float* scoresOutData,
622 Shape scoresOutShape, float* roiOutData, Shape roiOutShape,
623 int32_t* classesOutData, Shape classesOutShape, int32_t* batchesOutData,
624 const Shape& batchSplitOutShape, IOperationExecutionContext* context) {
625 NNTRACE_TRANS("boxWithNmsLimit");
626 std::vector<float> scores_float32(getNumberOfElements(scoresShape));
627 for (uint32_t i = 0; i < scores_float32.size(); i++) {
628 scores_float32[i] = scoresData[i];
629 }
630 std::vector<uint32_t> selected, batchSplitIn, batchSplitOut;
631 NN_RET_CHECK(boxWithNmsLimitFloat32Compute(
632 scores_float32.data(), scoresShape, roiData, roiShape, batchesData, batchesShape,
633 scoreThreshold, maxNumDetections, softNmsKernel, iouThreshold, sigma, nmsScoreThreshold,
634 &batchSplitIn, &batchSplitOut, &selected));
635 return boxWithNmsLimitWriteOutput<float, float>(selected, batchSplitIn, batchSplitOut,
636 scores_float32, context);
637 }
638
boxWithNmsLimitFloat16(const _Float16 * scoresData,const Shape & scoresShape,const _Float16 * roiData,const Shape & roiShape,const int32_t * batchesData,const Shape & batchesShape,_Float16 scoreThreshold,int32_t maxNumDetections,int32_t softNmsKernel,_Float16 iouThreshold,_Float16 sigma,_Float16 nmsScoreThreshold,_Float16 * scoresOutData,const Shape & scoresOutShape,_Float16 * roiOutData,const Shape & roiOutShape,int32_t * classesOutData,const Shape & classesOutShape,int32_t * batchesOutData,const Shape & batchSplitOutShape,IOperationExecutionContext * context)639 bool boxWithNmsLimitFloat16(const _Float16* scoresData, const Shape& scoresShape,
640 const _Float16* roiData, const Shape& roiShape,
641 const int32_t* batchesData, const Shape& batchesShape,
642 _Float16 scoreThreshold, int32_t maxNumDetections,
643 int32_t softNmsKernel, _Float16 iouThreshold, _Float16 sigma,
644 _Float16 nmsScoreThreshold, _Float16* scoresOutData,
645 const Shape& scoresOutShape, _Float16* roiOutData,
646 const Shape& roiOutShape, int32_t* classesOutData,
647 const Shape& classesOutShape, int32_t* batchesOutData,
648 const Shape& batchSplitOutShape, IOperationExecutionContext* context) {
649 std::vector<float> scores_float32(getNumberOfElements(scoresShape));
650 convertFloat16ToFloat32(scoresData, &scores_float32);
651 std::vector<float> roi_float32(getNumberOfElements(roiShape));
652 convertFloat16ToFloat32(roiData, &roi_float32);
653 std::vector<uint32_t> selected, batchSplitIn, batchSplitOut;
654 NN_RET_CHECK(boxWithNmsLimitFloat32Compute(
655 scores_float32.data(), scoresShape, roi_float32.data(), roiShape, batchesData,
656 batchesShape, scoreThreshold, maxNumDetections, softNmsKernel, iouThreshold, sigma,
657 nmsScoreThreshold, &batchSplitIn, &batchSplitOut, &selected));
658 return boxWithNmsLimitWriteOutput<_Float16, _Float16>(selected, batchSplitIn, batchSplitOut,
659 scores_float32, context);
660 }
661
boxWithNmsLimitQuant(const uint8_t * scoresData,const Shape & scoresShape,const uint16_t * roiData,const Shape & roiShape,const int32_t * batchesData,const Shape & batchesShape,float scoreThreshold,int32_t maxNumDetections,int32_t softNmsKernel,float iouThreshold,float sigma,float nmsScoreThreshold,uint8_t * scoresOutData,const Shape & scoresOutShape,uint16_t * roiOutData,const Shape & roiOutShape,int32_t * classesOutData,const Shape & classesOutShape,int32_t * batchesOutData,const Shape & batchSplitOutShape,IOperationExecutionContext * context)662 bool boxWithNmsLimitQuant(const uint8_t* scoresData, const Shape& scoresShape,
663 const uint16_t* roiData, const Shape& roiShape,
664 const int32_t* batchesData, const Shape& batchesShape,
665 float scoreThreshold, int32_t maxNumDetections, int32_t softNmsKernel,
666 float iouThreshold, float sigma, float nmsScoreThreshold,
667 uint8_t* scoresOutData, const Shape& scoresOutShape, uint16_t* roiOutData,
668 const Shape& roiOutShape, int32_t* classesOutData,
669 const Shape& classesOutShape, int32_t* batchesOutData,
670 const Shape& batchSplitOutShape, IOperationExecutionContext* context) {
671 std::vector<float> scores_float32(getNumberOfElements(scoresShape));
672 convertQuantToFloat32(scoresData, scoresShape.scale, scoresShape.offset, &scores_float32);
673 std::vector<float> roi_float32(getNumberOfElements(roiShape));
674 convertQuantToFloat32(roiData, roiShape.scale, roiShape.offset, &roi_float32);
675 std::vector<uint32_t> selected, batchSplitIn, batchSplitOut;
676 NN_RET_CHECK(boxWithNmsLimitFloat32Compute(
677 scores_float32.data(), scoresShape, roi_float32.data(), roiShape, batchesData,
678 batchesShape, scoreThreshold, maxNumDetections, softNmsKernel, iouThreshold, sigma,
679 nmsScoreThreshold, &batchSplitIn, &batchSplitOut, &selected));
680 return boxWithNmsLimitWriteOutput<uint8_t, uint16_t>(selected, batchSplitIn, batchSplitOut,
681 scores_float32, context);
682 }
683
boxWithNmsLimitQuant(const int8_t * scoresData,const Shape & scoresShape,const uint16_t * roiData,const Shape & roiShape,const int32_t * batchesData,const Shape & batchesShape,float scoreThreshold,int32_t maxNumDetections,int32_t softNmsKernel,float iouThreshold,float sigma,float nmsScoreThreshold,int8_t * scoresOutData,const Shape & scoresOutShape,uint16_t * roiOutData,const Shape & roiOutShape,int32_t * classesOutData,const Shape & classesOutShape,int32_t * batchesOutData,const Shape & batchSplitOutShape,IOperationExecutionContext * context)684 bool boxWithNmsLimitQuant(const int8_t* scoresData, const Shape& scoresShape,
685 const uint16_t* roiData, const Shape& roiShape,
686 const int32_t* batchesData, const Shape& batchesShape,
687 float scoreThreshold, int32_t maxNumDetections, int32_t softNmsKernel,
688 float iouThreshold, float sigma, float nmsScoreThreshold,
689 int8_t* scoresOutData, const Shape& scoresOutShape, uint16_t* roiOutData,
690 const Shape& roiOutShape, int32_t* classesOutData,
691 const Shape& classesOutShape, int32_t* batchesOutData,
692 const Shape& batchSplitOutShape, IOperationExecutionContext* context) {
693 std::vector<float> scores_float32(getNumberOfElements(scoresShape));
694 convertQuantToFloat32<int8_t>(scoresData, scoresShape.scale, scoresShape.offset,
695 &scores_float32);
696 std::vector<float> roi_float32(getNumberOfElements(roiShape));
697 convertQuantToFloat32(roiData, roiShape.scale, roiShape.offset, &roi_float32);
698 std::vector<uint32_t> selected, batchSplitIn, batchSplitOut;
699 NN_RET_CHECK(boxWithNmsLimitFloat32Compute(
700 scores_float32.data(), scoresShape, roi_float32.data(), roiShape, batchesData,
701 batchesShape, scoreThreshold, maxNumDetections, softNmsKernel, iouThreshold, sigma,
702 nmsScoreThreshold, &batchSplitIn, &batchSplitOut, &selected));
703 return boxWithNmsLimitWriteOutput<int8_t, uint16_t>(selected, batchSplitIn, batchSplitOut,
704 scores_float32, context);
705 }
706
707 } // namespace
708
validate(const IOperationValidationContext * context)709 bool validate(const IOperationValidationContext* context) {
710 NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs);
711 NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs);
712 std::vector<OperandType> inExpectedTypes;
713 std::vector<OperandType> outExpectedTypes;
714 auto inputType = context->getInputType(kScoreTensor);
715 if (inputType == OperandType::TENSOR_FLOAT16) {
716 inExpectedTypes = {
717 OperandType::TENSOR_FLOAT16, OperandType::TENSOR_FLOAT16, OperandType::TENSOR_INT32,
718 OperandType::FLOAT16, OperandType::INT32, OperandType::INT32,
719 OperandType::FLOAT16, OperandType::FLOAT16, OperandType::FLOAT16};
720 outExpectedTypes = {OperandType::TENSOR_FLOAT16, OperandType::TENSOR_FLOAT16,
721 OperandType::TENSOR_INT32, OperandType::TENSOR_INT32};
722 } else if (inputType == OperandType::TENSOR_FLOAT32) {
723 inExpectedTypes = {
724 OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32, OperandType::TENSOR_INT32,
725 OperandType::FLOAT32, OperandType::INT32, OperandType::INT32,
726 OperandType::FLOAT32, OperandType::FLOAT32, OperandType::FLOAT32};
727 outExpectedTypes = {OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
728 OperandType::TENSOR_INT32, OperandType::TENSOR_INT32};
729 } else if (inputType == OperandType::TENSOR_QUANT8_ASYMM ||
730 inputType == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
731 inExpectedTypes = {inputType,
732 OperandType::TENSOR_QUANT16_ASYMM,
733 OperandType::TENSOR_INT32,
734 OperandType::FLOAT32,
735 OperandType::INT32,
736 OperandType::INT32,
737 OperandType::FLOAT32,
738 OperandType::FLOAT32,
739 OperandType::FLOAT32};
740 outExpectedTypes = {inputType, OperandType::TENSOR_QUANT16_ASYMM, OperandType::TENSOR_INT32,
741 OperandType::TENSOR_INT32};
742 } else {
743 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
744 }
745 NN_RET_CHECK(validateInputTypes(context, inExpectedTypes));
746 NN_RET_CHECK(validateOutputTypes(context, outExpectedTypes));
747 if (inputType == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
748 return validateHalVersion(context, HalVersion::V1_3);
749 } else {
750 return validateHalVersion(context, HalVersion::V1_2);
751 }
752 }
753
prepare(IOperationExecutionContext * context)754 bool prepare(IOperationExecutionContext* context) {
755 Shape scoreShape = context->getInputShape(kScoreTensor);
756 Shape roiShape = context->getInputShape(kRoiTensor);
757 Shape batchesShape = context->getInputShape(kBatchesTensor);
758 Shape outputScoreShape = context->getOutputShape(kOutputScoreTensor);
759 Shape outputRoiShape = context->getOutputShape(kOutputRoiTensor);
760 Shape outputClassShape = context->getOutputShape(kOutputClassTensor);
761 Shape outputBatchSplitShape = context->getOutputShape(kOutputBatchesTensor);
762
763 NN_RET_CHECK(getNumberOfDimensions(scoreShape) == 2);
764 NN_RET_CHECK(getNumberOfDimensions(roiShape) == 2);
765 NN_RET_CHECK(getNumberOfDimensions(batchesShape) == 1);
766
767 // Only numRois can be zero.
768 const uint32_t kRoiDim = 4;
769 uint32_t numRois = getSizeOfDimension(scoreShape, 0);
770 uint32_t numClasses = getSizeOfDimension(scoreShape, 1);
771 NN_RET_CHECK(getSizeOfDimension(roiShape, 0) == numRois);
772 NN_RET_CHECK(getSizeOfDimension(roiShape, 1) == kRoiDim * numClasses);
773 NN_RET_CHECK(getSizeOfDimension(batchesShape, 0) == numRois);
774 NN_RET_CHECK_GT(numClasses, 1);
775
776 if (scoreShape.type == OperandType::TENSOR_QUANT8_ASYMM ||
777 scoreShape.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
778 NN_RET_CHECK_EQ(roiShape.scale, 0.125f);
779 NN_RET_CHECK_EQ(roiShape.offset, 0);
780 }
781
782 outputScoreShape.type = scoreShape.type;
783 outputScoreShape.dimensions = {0};
784 outputScoreShape.scale = scoreShape.scale;
785 outputScoreShape.offset = scoreShape.offset;
786 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, outputScoreShape));
787
788 outputRoiShape.type = roiShape.type;
789 outputRoiShape.dimensions = {0, 4};
790 outputRoiShape.scale = 0.f;
791 outputRoiShape.offset = 0;
792 if (scoreShape.type == OperandType::TENSOR_QUANT8_ASYMM ||
793 scoreShape.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
794 outputRoiShape.scale = 0.125f;
795 }
796 NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, outputRoiShape));
797
798 outputClassShape.type = OperandType::TENSOR_INT32;
799 outputClassShape.dimensions = {0};
800 NN_RET_CHECK(context->setOutputShape(kOutputClassTensor, outputClassShape));
801
802 outputBatchSplitShape.type = batchesShape.type;
803 outputBatchSplitShape.dimensions = {0};
804 NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, outputBatchSplitShape));
805 return true;
806 }
807
execute(IOperationExecutionContext * context)808 bool execute(IOperationExecutionContext* context) {
809 NNTRACE_TRANS("boxWithNMSLimit");
810 // Bypass execution in the case of zero numRois.
811 if (getSizeOfDimension(context->getInputShape(kScoreTensor), 0) == 0) return true;
812 switch (context->getInputType(kScoreTensor)) {
813 case OperandType::TENSOR_FLOAT16: {
814 return boxWithNmsLimitFloat16(
815 context->getInputBuffer<_Float16>(kScoreTensor),
816 context->getInputShape(kScoreTensor),
817 context->getInputBuffer<_Float16>(kRoiTensor),
818 context->getInputShape(kRoiTensor),
819 context->getInputBuffer<int32_t>(kBatchesTensor),
820 context->getInputShape(kBatchesTensor),
821 context->getInputValue<_Float16>(kScoreThresholdScalar),
822 context->getInputValue<int32_t>(kMaxNumDetectionScalar),
823 context->getInputValue<int32_t>(kNmsKernelScalar),
824 context->getInputValue<_Float16>(kIoUThresholdScalar),
825 context->getInputValue<_Float16>(kSigmaScalar),
826 context->getInputValue<_Float16>(kNmsScoreThresholdScalar),
827 context->getOutputBuffer<_Float16>(kOutputScoreTensor),
828 context->getOutputShape(kOutputScoreTensor),
829 context->getOutputBuffer<_Float16>(kOutputRoiTensor),
830 context->getOutputShape(kOutputRoiTensor),
831 context->getOutputBuffer<int32_t>(kOutputClassTensor),
832 context->getOutputShape(kOutputClassTensor),
833 context->getOutputBuffer<int32_t>(kOutputBatchesTensor),
834 context->getOutputShape(kOutputBatchesTensor), context);
835 }
836 case OperandType::TENSOR_FLOAT32: {
837 return boxWithNmsLimitFloat32(context->getInputBuffer<float>(kScoreTensor),
838 context->getInputShape(kScoreTensor),
839 context->getInputBuffer<float>(kRoiTensor),
840 context->getInputShape(kRoiTensor),
841 context->getInputBuffer<int32_t>(kBatchesTensor),
842 context->getInputShape(kBatchesTensor),
843 context->getInputValue<float>(kScoreThresholdScalar),
844 context->getInputValue<int32_t>(kMaxNumDetectionScalar),
845 context->getInputValue<int32_t>(kNmsKernelScalar),
846 context->getInputValue<float>(kIoUThresholdScalar),
847 context->getInputValue<float>(kSigmaScalar),
848 context->getInputValue<float>(kNmsScoreThresholdScalar),
849 context->getOutputBuffer<float>(kOutputScoreTensor),
850 context->getOutputShape(kOutputScoreTensor),
851 context->getOutputBuffer<float>(kOutputRoiTensor),
852 context->getOutputShape(kOutputRoiTensor),
853 context->getOutputBuffer<int32_t>(kOutputClassTensor),
854 context->getOutputShape(kOutputClassTensor),
855 context->getOutputBuffer<int32_t>(kOutputBatchesTensor),
856 context->getOutputShape(kOutputBatchesTensor), context);
857 }
858 case OperandType::TENSOR_QUANT8_ASYMM: {
859 return boxWithNmsLimitQuant(context->getInputBuffer<uint8_t>(kScoreTensor),
860 context->getInputShape(kScoreTensor),
861 context->getInputBuffer<uint16_t>(kRoiTensor),
862 context->getInputShape(kRoiTensor),
863 context->getInputBuffer<int32_t>(kBatchesTensor),
864 context->getInputShape(kBatchesTensor),
865 context->getInputValue<float>(kScoreThresholdScalar),
866 context->getInputValue<int32_t>(kMaxNumDetectionScalar),
867 context->getInputValue<int32_t>(kNmsKernelScalar),
868 context->getInputValue<float>(kIoUThresholdScalar),
869 context->getInputValue<float>(kSigmaScalar),
870 context->getInputValue<float>(kNmsScoreThresholdScalar),
871 context->getOutputBuffer<uint8_t>(kOutputScoreTensor),
872 context->getOutputShape(kOutputScoreTensor),
873 context->getOutputBuffer<uint16_t>(kOutputRoiTensor),
874 context->getOutputShape(kOutputRoiTensor),
875 context->getOutputBuffer<int32_t>(kOutputClassTensor),
876 context->getOutputShape(kOutputClassTensor),
877 context->getOutputBuffer<int32_t>(kOutputBatchesTensor),
878 context->getOutputShape(kOutputBatchesTensor), context);
879 }
880 case OperandType::TENSOR_QUANT8_ASYMM_SIGNED: {
881 return boxWithNmsLimitQuant(context->getInputBuffer<int8_t>(kScoreTensor),
882 context->getInputShape(kScoreTensor),
883 context->getInputBuffer<uint16_t>(kRoiTensor),
884 context->getInputShape(kRoiTensor),
885 context->getInputBuffer<int32_t>(kBatchesTensor),
886 context->getInputShape(kBatchesTensor),
887 context->getInputValue<float>(kScoreThresholdScalar),
888 context->getInputValue<int32_t>(kMaxNumDetectionScalar),
889 context->getInputValue<int32_t>(kNmsKernelScalar),
890 context->getInputValue<float>(kIoUThresholdScalar),
891 context->getInputValue<float>(kSigmaScalar),
892 context->getInputValue<float>(kNmsScoreThresholdScalar),
893 context->getOutputBuffer<int8_t>(kOutputScoreTensor),
894 context->getOutputShape(kOutputScoreTensor),
895 context->getOutputBuffer<uint16_t>(kOutputRoiTensor),
896 context->getOutputShape(kOutputRoiTensor),
897 context->getOutputBuffer<int32_t>(kOutputClassTensor),
898 context->getOutputShape(kOutputClassTensor),
899 context->getOutputBuffer<int32_t>(kOutputBatchesTensor),
900 context->getOutputShape(kOutputBatchesTensor), context);
901 }
902 default:
903 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
904 }
905 }
906
907 } // namespace box_with_nms_limit
908
909 namespace generate_proposals {
910
911 constexpr char kOperationName[] = "GENERATE_PROPOSALS";
912
913 constexpr uint32_t kNumInputs = 11;
914 constexpr uint32_t kScoreTensor = 0;
915 constexpr uint32_t kDeltaTensor = 1;
916 constexpr uint32_t kAnchorTensor = 2;
917 constexpr uint32_t kImageInfoTensor = 3;
918 constexpr uint32_t kHeightStrideSalar = 4;
919 constexpr uint32_t kWidthStrideScalar = 5;
920 constexpr uint32_t kPreNmsMaxScalar = 6;
921 constexpr uint32_t kPostNmsMaxScalar = 7;
922 constexpr uint32_t kIoUThresholdScalar = 8;
923 constexpr uint32_t kMinSizeScalar = 9;
924 constexpr uint32_t kLayoutScalar = 10;
925
926 constexpr uint32_t kNumOutputs = 3;
927 constexpr uint32_t kOutputScoreTensor = 0;
928 constexpr uint32_t kOutputRoiTensor = 1;
929 constexpr uint32_t kOutputBatchesTensor = 2;
930
931 namespace {
932
filterBoxes(const float * roiBase,const float * imageInfoBase,float minSize,std::vector<uint32_t> * select)933 void filterBoxes(const float* roiBase, const float* imageInfoBase, float minSize,
934 std::vector<uint32_t>* select) {
935 const uint32_t kRoiDim = 4;
936 uint32_t i = 0;
937 for (uint32_t j = 0; j < select->size(); j++) {
938 const float* roiInfo = roiBase + (*select)[j] * kRoiDim;
939 float roiWidth, roiHeight, xRoiCenter, yRoiCenter;
940 roiWidth = roiInfo[2] - roiInfo[0];
941 roiHeight = roiInfo[3] - roiInfo[1];
942 xRoiCenter = roiInfo[0] + roiWidth / 2.0f;
943 yRoiCenter = roiInfo[1] + roiHeight / 2.0f;
944 if (roiWidth > minSize && roiHeight > minSize && xRoiCenter < imageInfoBase[1] &&
945 yRoiCenter < imageInfoBase[0]) {
946 (*select)[i++] = (*select)[j];
947 }
948 }
949 select->resize(i);
950 }
951
generateProposalsNhwcFloat32Compute(const float * scoresData,const Shape & scoresShape,const float * bboxDeltasData,const Shape & bboxDeltasShape,const float * anchorsData,const Shape & anchorsShape,const float * imageInfoData,const Shape & imageInfoShape,float heightStride,float widthStride,int32_t preNmsTopN,int32_t postNmsTopN,float iouThreshold,float minSize,std::vector<float> * scoresOutData,std::vector<float> * roiOutData,std::vector<int32_t> * batchesOutData)952 bool generateProposalsNhwcFloat32Compute(const float* scoresData, const Shape& scoresShape,
953 const float* bboxDeltasData, const Shape& bboxDeltasShape,
954 const float* anchorsData, const Shape& anchorsShape,
955 const float* imageInfoData, const Shape& imageInfoShape,
956 float heightStride, float widthStride, int32_t preNmsTopN,
957 int32_t postNmsTopN, float iouThreshold, float minSize,
958 std::vector<float>* scoresOutData,
959 std::vector<float>* roiOutData,
960 std::vector<int32_t>* batchesOutData) {
961 const uint32_t kRoiDim = 4;
962 uint32_t numBatches = getSizeOfDimension(scoresShape, 0);
963 uint32_t height = getSizeOfDimension(scoresShape, 1);
964 uint32_t width = getSizeOfDimension(scoresShape, 2);
965 uint32_t numAnchors = getSizeOfDimension(scoresShape, 3);
966 uint32_t imageInfoLength = getSizeOfDimension(imageInfoShape, 1);
967
968 uint32_t batchSize = height * width * numAnchors;
969 uint32_t roiBufferSize = batchSize * kRoiDim;
970 std::vector<float> roiBuffer(roiBufferSize);
971 std::vector<float> roiTransformedBuffer(roiBufferSize);
972 scoresOutData->clear();
973 roiOutData->clear();
974 batchesOutData->clear();
975
976 // Compute the roi region for each anchor.
977 float* roiBase = roiBuffer.data();
978 for (uint32_t h = 0; h < height; h++) {
979 float hShift = h * heightStride;
980 for (uint32_t w = 0; w < width; w++) {
981 const float* anchorsBase = anchorsData;
982 float wShift = w * widthStride;
983 for (uint32_t a = 0; a < numAnchors; a++, roiBase += kRoiDim, anchorsBase += kRoiDim) {
984 roiBase[0] = anchorsBase[0] + wShift;
985 roiBase[1] = anchorsBase[1] + hShift;
986 roiBase[2] = anchorsBase[2] + wShift;
987 roiBase[3] = anchorsBase[3] + hShift;
988 }
989 }
990 }
991
992 const float* scoresBase = scoresData;
993 const float* bboxDeltasBase = bboxDeltasData;
994 const float* imageInfoBase = imageInfoData;
995 // Need to fake some data to satisfy bboxTransform.
996 Shape tempRoiShape = anchorsShape;
997 tempRoiShape.dimensions = {batchSize, kRoiDim};
998 Shape tempBBoxDeltasShape = bboxDeltasShape;
999 tempBBoxDeltasShape.dimensions = {batchSize, kRoiDim};
1000 std::vector<int32_t> tempBatchSplitData(batchSize, 0);
1001 Shape tempbatchSplitShape = {.dimensions = {batchSize}};
1002 Shape tempImageInfoShape = imageInfoShape;
1003 tempImageInfoShape.dimensions = {1, imageInfoLength};
1004
1005 for (uint32_t b = 0; b < numBatches; b++) {
1006 // Apply bboxDeltas to anchor locations.
1007 float tempImageInfo[] = {imageInfoBase[0], imageInfoBase[1]};
1008 if (!bboxTransformFloat32(roiBuffer.data(), tempRoiShape, bboxDeltasBase,
1009 tempBBoxDeltasShape, tempBatchSplitData.data(),
1010 tempbatchSplitShape, tempImageInfo, tempImageInfoShape,
1011 roiTransformedBuffer.data(), tempRoiShape)) {
1012 LOG(ERROR) << "BBoxTransform step failed in GENERATE_PROPOSALS op.";
1013 return false;
1014 }
1015
1016 // Find the top preNmsTopN scores.
1017 std::vector<uint32_t> select(batchSize);
1018 std::iota(select.begin(), select.end(), 0);
1019 if (preNmsTopN > 0 && preNmsTopN < select.size()) {
1020 std::sort(select.begin(), select.end(),
1021 [&scoresBase](const uint32_t lhs, const uint32_t rhs) {
1022 return scoresBase[lhs] > scoresBase[rhs];
1023 });
1024 select.resize(preNmsTopN);
1025 }
1026
1027 // Filter boxes, disgard regions with height or width < minSize.
1028 filterBoxes(roiTransformedBuffer.data(), imageInfoBase, minSize, &select);
1029
1030 // Apply hard NMS.
1031 uint32_t* selectEnd = box_with_nms_limit::hardNmsSingleClass(
1032 scoresBase, iouThreshold, postNmsTopN,
1033 [&roiTransformedBuffer](uint32_t ind) {
1034 return roiTransformedBuffer.data() + ind * kRoiDim;
1035 },
1036 select.data(), select.size());
1037 uint32_t selectSize = selectEnd - select.data();
1038 select.resize(selectSize);
1039
1040 // Write output.
1041 for (auto i : select) {
1042 roiOutData->insert(roiOutData->end(), roiTransformedBuffer.begin() + i * kRoiDim,
1043 roiTransformedBuffer.begin() + (i + 1) * kRoiDim);
1044 scoresOutData->push_back(scoresBase[i]);
1045 batchesOutData->push_back(b);
1046 }
1047 scoresBase += batchSize;
1048 bboxDeltasBase += roiBufferSize;
1049 imageInfoBase += imageInfoLength;
1050 }
1051 return true;
1052 }
1053
generateProposalsFloat32Compute(const float * scoresData,const Shape & scoresShape,const float * bboxDeltasData,const Shape & bboxDeltasShape,const float * anchorsData,const Shape & anchorsShape,const float * imageInfoData,const Shape & imageInfoShape,float heightStride,float widthStride,int32_t preNmsTopN,int32_t postNmsTopN,float iouThreshold,float minSize,bool useNchw,std::vector<float> * scoresOutData,std::vector<float> * roiOutData,std::vector<int32_t> * batchesOutData)1054 bool generateProposalsFloat32Compute(const float* scoresData, const Shape& scoresShape,
1055 const float* bboxDeltasData, const Shape& bboxDeltasShape,
1056 const float* anchorsData, const Shape& anchorsShape,
1057 const float* imageInfoData, const Shape& imageInfoShape,
1058 float heightStride, float widthStride, int32_t preNmsTopN,
1059 int32_t postNmsTopN, float iouThreshold, float minSize,
1060 bool useNchw, std::vector<float>* scoresOutData,
1061 std::vector<float>* roiOutData,
1062 std::vector<int32_t>* batchesOutData) {
1063 InputWithLayout<float> score_nhwc(useNchw), delta_nhwc(useNchw);
1064 NN_RET_CHECK(score_nhwc.initialize(scoresData, scoresShape));
1065 NN_RET_CHECK(delta_nhwc.initialize(bboxDeltasData, bboxDeltasShape));
1066 return generateProposalsNhwcFloat32Compute(
1067 score_nhwc.getNhwcBuffer(), score_nhwc.getNhwcShape(), delta_nhwc.getNhwcBuffer(),
1068 delta_nhwc.getNhwcShape(), anchorsData, anchorsShape, imageInfoData, imageInfoShape,
1069 heightStride, widthStride, preNmsTopN, postNmsTopN, iouThreshold, minSize,
1070 scoresOutData, roiOutData, batchesOutData);
1071 }
1072
generateProposalsFloat32(const float * scoresData,const Shape & scoresShape,const float * bboxDeltasData,const Shape & bboxDeltasShape,const float * anchorsData,const Shape & anchorsShape,const float * imageInfoData,const Shape & imageInfoShape,float heightStride,float widthStride,int32_t preNmsTopN,int32_t postNmsTopN,float iouThreshold,float minSize,bool useNchw,IOperationExecutionContext * context)1073 bool generateProposalsFloat32(const float* scoresData, const Shape& scoresShape,
1074 const float* bboxDeltasData, const Shape& bboxDeltasShape,
1075 const float* anchorsData, const Shape& anchorsShape,
1076 const float* imageInfoData, const Shape& imageInfoShape,
1077 float heightStride, float widthStride, int32_t preNmsTopN,
1078 int32_t postNmsTopN, float iouThreshold, float minSize, bool useNchw,
1079 IOperationExecutionContext* context) {
1080 std::vector<float> scoresOut_float32, roiOut_float32;
1081 std::vector<int32_t> batchesOut;
1082 NN_RET_CHECK(generateProposalsFloat32Compute(
1083 scoresData, scoresShape, bboxDeltasData, bboxDeltasShape, anchorsData, anchorsShape,
1084 imageInfoData, imageInfoShape, heightStride, widthStride, preNmsTopN, postNmsTopN,
1085 iouThreshold, minSize, useNchw, &scoresOut_float32, &roiOut_float32, &batchesOut));
1086
1087 // Set output dimensions.
1088 uint32_t numOutRois = scoresOut_float32.size();
1089 if (numOutRois == 0) return true;
1090 Shape scoresOutShape = context->getOutputShape(kOutputScoreTensor);
1091 scoresOutShape.dimensions = {numOutRois};
1092 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, scoresOutShape));
1093 Shape roiOutShape = context->getOutputShape(kOutputRoiTensor);
1094 roiOutShape.dimensions = {numOutRois, 4};
1095 NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, roiOutShape));
1096 Shape batchesOutShape = context->getOutputShape(kOutputBatchesTensor);
1097 batchesOutShape.dimensions = {numOutRois};
1098 NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, batchesOutShape));
1099
1100 // Write outputs.
1101 float* scoresOutData = context->getOutputBuffer<float>(kOutputScoreTensor);
1102 for (uint32_t i = 0; i < scoresOut_float32.size(); i++) {
1103 scoresOutData[i] = scoresOut_float32[i];
1104 }
1105 float* roiOutData = context->getOutputBuffer<float>(kOutputRoiTensor);
1106 for (uint32_t i = 0; i < roiOut_float32.size(); i++) {
1107 roiOutData[i] = roiOut_float32[i];
1108 }
1109 int32_t* batchesOutData = context->getOutputBuffer<int32_t>(kOutputBatchesTensor);
1110 for (uint32_t i = 0; i < batchesOut.size(); i++) {
1111 batchesOutData[i] = batchesOut[i];
1112 }
1113 return true;
1114 }
1115
generateProposalsFloat16(const _Float16 * scoresData,const Shape & scoresShape,const _Float16 * bboxDeltasData,const Shape & bboxDeltasShape,const _Float16 * anchorsData,const Shape & anchorsShape,const _Float16 * imageInfoData,const Shape & imageInfoShape,float heightStride,float widthStride,int32_t preNmsTopN,int32_t postNmsTopN,float iouThreshold,float minSize,bool useNchw,IOperationExecutionContext * context)1116 bool generateProposalsFloat16(const _Float16* scoresData, const Shape& scoresShape,
1117 const _Float16* bboxDeltasData, const Shape& bboxDeltasShape,
1118 const _Float16* anchorsData, const Shape& anchorsShape,
1119 const _Float16* imageInfoData, const Shape& imageInfoShape,
1120 float heightStride, float widthStride, int32_t preNmsTopN,
1121 int32_t postNmsTopN, float iouThreshold, float minSize, bool useNchw,
1122 IOperationExecutionContext* context) {
1123 std::vector<float> score_float32(getNumberOfElements(scoresShape));
1124 convertFloat16ToFloat32(scoresData, &score_float32);
1125 std::vector<float> delta_float32(getNumberOfElements(bboxDeltasShape));
1126 convertFloat16ToFloat32(bboxDeltasData, &delta_float32);
1127 std::vector<float> anchors_float32(getNumberOfElements(anchorsShape));
1128 convertFloat16ToFloat32(anchorsData, &anchors_float32);
1129 std::vector<float> imageInfo_float32(getNumberOfElements(imageInfoShape));
1130 convertFloat16ToFloat32(imageInfoData, &imageInfo_float32);
1131 std::vector<float> scoresOut_float32, roiOut_float32;
1132 std::vector<int32_t> batchesOut;
1133 NN_RET_CHECK(generateProposalsFloat32Compute(
1134 score_float32.data(), scoresShape, delta_float32.data(), bboxDeltasShape,
1135 anchors_float32.data(), anchorsShape, imageInfo_float32.data(), imageInfoShape,
1136 heightStride, widthStride, preNmsTopN, postNmsTopN, iouThreshold, minSize, useNchw,
1137 &scoresOut_float32, &roiOut_float32, &batchesOut));
1138
1139 // Set output dimensions.
1140 uint32_t numOutRois = scoresOut_float32.size();
1141 if (numOutRois == 0) return true;
1142 Shape scoresOutShape = context->getOutputShape(kOutputScoreTensor);
1143 scoresOutShape.dimensions = {numOutRois};
1144 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, scoresOutShape));
1145 Shape roiOutShape = context->getOutputShape(kOutputRoiTensor);
1146 roiOutShape.dimensions = {numOutRois, 4};
1147 NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, roiOutShape));
1148 Shape batchesOutShape = context->getOutputShape(kOutputBatchesTensor);
1149 batchesOutShape.dimensions = {numOutRois};
1150 NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, batchesOutShape));
1151
1152 // Write outputs.
1153 _Float16* scoresOutData = context->getOutputBuffer<_Float16>(kOutputScoreTensor);
1154 convertFloat32ToFloat16(scoresOut_float32, scoresOutData);
1155 _Float16* roiOutData = context->getOutputBuffer<_Float16>(kOutputRoiTensor);
1156 convertFloat32ToFloat16(roiOut_float32, roiOutData);
1157 int32_t* batchesOutData = context->getOutputBuffer<int32_t>(kOutputBatchesTensor);
1158 for (uint32_t i = 0; i < batchesOut.size(); i++) {
1159 batchesOutData[i] = batchesOut[i];
1160 }
1161 return true;
1162 }
1163
1164 template <typename T_8QInput>
generateProposalsQuant(const T_8QInput * scoresData,const Shape & scoresShape,const T_8QInput * bboxDeltasData,const Shape & bboxDeltasShape,const int16_t * anchorsData,const Shape & anchorsShape,const uint16_t * imageInfoData,const Shape & imageInfoShape,float heightStride,float widthStride,int32_t preNmsTopN,int32_t postNmsTopN,float iouThreshold,float minSize,bool useNchw,IOperationExecutionContext * context)1165 bool generateProposalsQuant(const T_8QInput* scoresData, const Shape& scoresShape,
1166 const T_8QInput* bboxDeltasData, const Shape& bboxDeltasShape,
1167 const int16_t* anchorsData, const Shape& anchorsShape,
1168 const uint16_t* imageInfoData, const Shape& imageInfoShape,
1169 float heightStride, float widthStride, int32_t preNmsTopN,
1170 int32_t postNmsTopN, float iouThreshold, float minSize, bool useNchw,
1171 IOperationExecutionContext* context) {
1172 std::vector<float> score_float32(getNumberOfElements(scoresShape));
1173 convertQuantToFloat32<T_8QInput>(scoresData, scoresShape.scale, scoresShape.offset,
1174 &score_float32);
1175 std::vector<float> delta_float32(getNumberOfElements(bboxDeltasShape));
1176 convertQuantToFloat32<T_8QInput>(bboxDeltasData, bboxDeltasShape.scale, bboxDeltasShape.offset,
1177 &delta_float32);
1178 std::vector<float> anchors_float32(getNumberOfElements(anchorsShape));
1179 convertQuantToFloat32(anchorsData, anchorsShape.scale, anchorsShape.offset, &anchors_float32);
1180 std::vector<float> imageInfo_float32(getNumberOfElements(imageInfoShape));
1181 convertQuantToFloat32(imageInfoData, imageInfoShape.scale, imageInfoShape.offset,
1182 &imageInfo_float32);
1183 std::vector<float> scoresOut_float32, roiOut_float32;
1184 std::vector<int32_t> batchesOut;
1185 NN_RET_CHECK(generateProposalsFloat32Compute(
1186 score_float32.data(), scoresShape, delta_float32.data(), bboxDeltasShape,
1187 anchors_float32.data(), anchorsShape, imageInfo_float32.data(), imageInfoShape,
1188 heightStride, widthStride, preNmsTopN, postNmsTopN, iouThreshold, minSize, useNchw,
1189 &scoresOut_float32, &roiOut_float32, &batchesOut));
1190
1191 // Set output dimensions.
1192 uint32_t numOutRois = scoresOut_float32.size();
1193 if (numOutRois == 0) return true;
1194 Shape scoresOutShape = context->getOutputShape(kOutputScoreTensor);
1195 scoresOutShape.dimensions = {numOutRois};
1196 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, scoresOutShape));
1197 Shape roiOutShape = context->getOutputShape(kOutputRoiTensor);
1198 roiOutShape.dimensions = {numOutRois, 4};
1199 NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, roiOutShape));
1200 Shape batchesOutShape = context->getOutputShape(kOutputBatchesTensor);
1201 batchesOutShape.dimensions = {numOutRois};
1202 NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, batchesOutShape));
1203
1204 // Write outputs.
1205 T_8QInput* scoresOutData = context->getOutputBuffer<T_8QInput>(kOutputScoreTensor);
1206 convertFloat32ToQuant<T_8QInput>(scoresOut_float32, scoresOutShape.scale, scoresOutShape.offset,
1207 scoresOutData);
1208 uint16_t* roiOutData = context->getOutputBuffer<uint16_t>(kOutputRoiTensor);
1209 convertFloat32ToQuant(roiOut_float32, roiOutShape.scale, roiOutShape.offset, roiOutData);
1210 int32_t* batchesOutData = context->getOutputBuffer<int32_t>(kOutputBatchesTensor);
1211 for (uint32_t i = 0; i < batchesOut.size(); i++) {
1212 batchesOutData[i] = batchesOut[i];
1213 }
1214 return true;
1215 }
1216
1217 } // namespace
1218
validate(const IOperationValidationContext * context)1219 bool validate(const IOperationValidationContext* context) {
1220 NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs);
1221 NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs);
1222 std::vector<OperandType> inExpectedTypes;
1223 std::vector<OperandType> outExpectedTypes;
1224 auto inputType = context->getInputType(kScoreTensor);
1225 if (inputType == OperandType::TENSOR_FLOAT16) {
1226 inExpectedTypes = {OperandType::TENSOR_FLOAT16,
1227 OperandType::TENSOR_FLOAT16,
1228 OperandType::TENSOR_FLOAT16,
1229 OperandType::TENSOR_FLOAT16,
1230 OperandType::FLOAT16,
1231 OperandType::FLOAT16,
1232 OperandType::INT32,
1233 OperandType::INT32,
1234 OperandType::FLOAT16,
1235 OperandType::FLOAT16,
1236 OperandType::BOOL};
1237 outExpectedTypes = {OperandType::TENSOR_FLOAT16, OperandType::TENSOR_FLOAT16,
1238 OperandType::TENSOR_INT32};
1239 } else if (inputType == OperandType::TENSOR_FLOAT32) {
1240 inExpectedTypes = {OperandType::TENSOR_FLOAT32,
1241 OperandType::TENSOR_FLOAT32,
1242 OperandType::TENSOR_FLOAT32,
1243 OperandType::TENSOR_FLOAT32,
1244 OperandType::FLOAT32,
1245 OperandType::FLOAT32,
1246 OperandType::INT32,
1247 OperandType::INT32,
1248 OperandType::FLOAT32,
1249 OperandType::FLOAT32,
1250 OperandType::BOOL};
1251 outExpectedTypes = {OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
1252 OperandType::TENSOR_INT32};
1253 } else if (inputType == OperandType::TENSOR_QUANT8_ASYMM ||
1254 inputType == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
1255 inExpectedTypes = {inputType,
1256 inputType,
1257 OperandType::TENSOR_QUANT16_SYMM,
1258 OperandType::TENSOR_QUANT16_ASYMM,
1259 OperandType::FLOAT32,
1260 OperandType::FLOAT32,
1261 OperandType::INT32,
1262 OperandType::INT32,
1263 OperandType::FLOAT32,
1264 OperandType::FLOAT32,
1265 OperandType::BOOL};
1266 outExpectedTypes = {inputType, OperandType::TENSOR_QUANT16_ASYMM,
1267 OperandType::TENSOR_INT32};
1268 } else {
1269 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
1270 }
1271 NN_RET_CHECK(validateInputTypes(context, inExpectedTypes));
1272 NN_RET_CHECK(validateOutputTypes(context, outExpectedTypes));
1273 if (inputType == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
1274 return validateHalVersion(context, HalVersion::V1_3);
1275 } else {
1276 return validateHalVersion(context, HalVersion::V1_2);
1277 }
1278 }
1279
prepare(IOperationExecutionContext * context)1280 bool prepare(IOperationExecutionContext* context) {
1281 bool useNchw = context->getInputValue<bool>(kLayoutScalar);
1282 Shape scoreShape = context->getInputShape(kScoreTensor);
1283 Shape bboxDeltasShape = context->getInputShape(kDeltaTensor);
1284 Shape anchorsShape = context->getInputShape(kAnchorTensor);
1285 Shape imageInfoDataShape = context->getInputShape(kImageInfoTensor);
1286 Shape outputScoreShape = context->getOutputShape(kOutputScoreTensor);
1287 Shape outputRoiShape = context->getOutputShape(kOutputRoiTensor);
1288 Shape outputBatchSplitShape = context->getOutputShape(kOutputBatchesTensor);
1289
1290 NN_RET_CHECK_EQ(getNumberOfDimensions(scoreShape), 4);
1291 NN_RET_CHECK_EQ(getNumberOfDimensions(bboxDeltasShape), 4);
1292 NN_RET_CHECK_EQ(getNumberOfDimensions(anchorsShape), 2);
1293 NN_RET_CHECK_EQ(getNumberOfDimensions(imageInfoDataShape), 2);
1294
1295 const uint32_t kRoiDim = 4;
1296 uint32_t numBatches = getSizeOfDimension(scoreShape, 0);
1297 uint32_t height = getSizeOfDimension(scoreShape, useNchw ? 2 : 1);
1298 uint32_t width = getSizeOfDimension(scoreShape, useNchw ? 3 : 2);
1299 uint32_t numAnchors = getSizeOfDimension(scoreShape, useNchw ? 1 : 3);
1300
1301 NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, 0), numBatches);
1302 NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, useNchw ? 2 : 1), height);
1303 NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, useNchw ? 3 : 2), width);
1304 NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, useNchw ? 1 : 3), numAnchors * kRoiDim);
1305 NN_RET_CHECK_EQ(getSizeOfDimension(imageInfoDataShape, 0), numBatches);
1306 NN_RET_CHECK_EQ(getSizeOfDimension(imageInfoDataShape, 1), 2);
1307 NN_RET_CHECK_EQ(getSizeOfDimension(anchorsShape, 0), numAnchors);
1308 NN_RET_CHECK_EQ(getSizeOfDimension(anchorsShape, 1), kRoiDim);
1309
1310 if (scoreShape.type == OperandType::TENSOR_QUANT8_ASYMM) {
1311 NN_RET_CHECK_EQ(anchorsShape.scale, 0.125f);
1312 NN_RET_CHECK_EQ(imageInfoDataShape.scale, 0.125f);
1313 NN_RET_CHECK_EQ(imageInfoDataShape.offset, 0);
1314 }
1315
1316 outputScoreShape.type = scoreShape.type;
1317 outputScoreShape.dimensions = {0};
1318 outputScoreShape.scale = scoreShape.scale;
1319 outputScoreShape.offset = scoreShape.offset;
1320 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, outputScoreShape));
1321
1322 outputRoiShape.dimensions = {0, 4};
1323 if (scoreShape.type == OperandType::TENSOR_QUANT8_ASYMM) {
1324 outputRoiShape.scale = 0.125f;
1325 outputRoiShape.offset = 0;
1326 }
1327 NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, outputRoiShape));
1328
1329 outputBatchSplitShape.dimensions = {0};
1330 NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, outputBatchSplitShape));
1331 return true;
1332 }
1333
execute(IOperationExecutionContext * context)1334 bool execute(IOperationExecutionContext* context) {
1335 NNTRACE_TRANS("generateProposals");
1336 switch (context->getInputType(kScoreTensor)) {
1337 case OperandType::TENSOR_FLOAT16: {
1338 return generateProposalsFloat16(context->getInputBuffer<_Float16>(kScoreTensor),
1339 context->getInputShape(kScoreTensor),
1340 context->getInputBuffer<_Float16>(kDeltaTensor),
1341 context->getInputShape(kDeltaTensor),
1342 context->getInputBuffer<_Float16>(kAnchorTensor),
1343 context->getInputShape(kAnchorTensor),
1344 context->getInputBuffer<_Float16>(kImageInfoTensor),
1345 context->getInputShape(kImageInfoTensor),
1346 context->getInputValue<_Float16>(kHeightStrideSalar),
1347 context->getInputValue<_Float16>(kWidthStrideScalar),
1348 context->getInputValue<int32_t>(kPreNmsMaxScalar),
1349 context->getInputValue<int32_t>(kPostNmsMaxScalar),
1350 context->getInputValue<_Float16>(kIoUThresholdScalar),
1351 context->getInputValue<_Float16>(kMinSizeScalar),
1352 context->getInputValue<bool>(kLayoutScalar), context);
1353 }
1354 case OperandType::TENSOR_FLOAT32: {
1355 return generateProposalsFloat32(context->getInputBuffer<float>(kScoreTensor),
1356 context->getInputShape(kScoreTensor),
1357 context->getInputBuffer<float>(kDeltaTensor),
1358 context->getInputShape(kDeltaTensor),
1359 context->getInputBuffer<float>(kAnchorTensor),
1360 context->getInputShape(kAnchorTensor),
1361 context->getInputBuffer<float>(kImageInfoTensor),
1362 context->getInputShape(kImageInfoTensor),
1363 context->getInputValue<float>(kHeightStrideSalar),
1364 context->getInputValue<float>(kWidthStrideScalar),
1365 context->getInputValue<int32_t>(kPreNmsMaxScalar),
1366 context->getInputValue<int32_t>(kPostNmsMaxScalar),
1367 context->getInputValue<float>(kIoUThresholdScalar),
1368 context->getInputValue<float>(kMinSizeScalar),
1369 context->getInputValue<bool>(kLayoutScalar), context);
1370 }
1371 case OperandType::TENSOR_QUANT8_ASYMM: {
1372 return generateProposalsQuant(context->getInputBuffer<uint8_t>(kScoreTensor),
1373 context->getInputShape(kScoreTensor),
1374 context->getInputBuffer<uint8_t>(kDeltaTensor),
1375 context->getInputShape(kDeltaTensor),
1376 context->getInputBuffer<int16_t>(kAnchorTensor),
1377 context->getInputShape(kAnchorTensor),
1378 context->getInputBuffer<uint16_t>(kImageInfoTensor),
1379 context->getInputShape(kImageInfoTensor),
1380 context->getInputValue<float>(kHeightStrideSalar),
1381 context->getInputValue<float>(kWidthStrideScalar),
1382 context->getInputValue<int32_t>(kPreNmsMaxScalar),
1383 context->getInputValue<int32_t>(kPostNmsMaxScalar),
1384 context->getInputValue<float>(kIoUThresholdScalar),
1385 context->getInputValue<float>(kMinSizeScalar),
1386 context->getInputValue<bool>(kLayoutScalar), context);
1387 }
1388 case OperandType::TENSOR_QUANT8_ASYMM_SIGNED: {
1389 return generateProposalsQuant(context->getInputBuffer<int8_t>(kScoreTensor),
1390 context->getInputShape(kScoreTensor),
1391 context->getInputBuffer<int8_t>(kDeltaTensor),
1392 context->getInputShape(kDeltaTensor),
1393 context->getInputBuffer<int16_t>(kAnchorTensor),
1394 context->getInputShape(kAnchorTensor),
1395 context->getInputBuffer<uint16_t>(kImageInfoTensor),
1396 context->getInputShape(kImageInfoTensor),
1397 context->getInputValue<float>(kHeightStrideSalar),
1398 context->getInputValue<float>(kWidthStrideScalar),
1399 context->getInputValue<int32_t>(kPreNmsMaxScalar),
1400 context->getInputValue<int32_t>(kPostNmsMaxScalar),
1401 context->getInputValue<float>(kIoUThresholdScalar),
1402 context->getInputValue<float>(kMinSizeScalar),
1403 context->getInputValue<bool>(kLayoutScalar), context);
1404 }
1405 default:
1406 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
1407 }
1408 }
1409
1410 } // namespace generate_proposals
1411
1412 namespace detection_postprocess {
1413
1414 constexpr char kOperationName[] = "DETECTION_POSTPROCESS";
1415
1416 constexpr uint32_t kNumInputs = 14;
1417 constexpr uint32_t kScoreTensor = 0;
1418 constexpr uint32_t kDeltaTensor = 1;
1419 constexpr uint32_t kAnchorTensor = 2;
1420 constexpr uint32_t kScaleYScalar = 3;
1421 constexpr uint32_t kScaleXScalar = 4;
1422 constexpr uint32_t kScaleHScalar = 5;
1423 constexpr uint32_t kScaleWScalar = 6;
1424 constexpr uint32_t kUseRegularNmsScalar = 7;
1425 constexpr uint32_t kMaxNumDetectionScalar = 8;
1426 constexpr uint32_t kMaxClassesPerDetectionScalar = 9;
1427 constexpr uint32_t kMaxNumDetectionPerClassScalar = 10;
1428 constexpr uint32_t kScoreThresholdScalar = 11;
1429 constexpr uint32_t kIoUThresholdScalar = 12;
1430 constexpr uint32_t kIsBGInLabelScalar = 13;
1431
1432 constexpr uint32_t kNumOutputs = 4;
1433 constexpr uint32_t kOutputScoreTensor = 0;
1434 constexpr uint32_t kOutputRoiTensor = 1;
1435 constexpr uint32_t kOutputClassTensor = 2;
1436 constexpr uint32_t kOutputDetectionTensor = 3;
1437
1438 namespace {
1439
detectionPostprocessFloat32(const float * scoreData,const Shape & scoreShape,const float * deltaData,const Shape & deltaShape,const float * anchorData,const Shape & anchorShape,float scaleY,float scaleX,float scaleH,float scaleW,bool useRegularNms,int32_t maxNumDetections,int32_t maxClassesPerDetection,int32_t maxNumDetectionsPerClass,float iouThreshold,float scoreThreshold,bool isBGInLabel,float * scoreOutData,const Shape & scoreOutShape,float * roiOutData,const Shape & roiOutShape,int32_t * classOutData,const Shape & classOutShape,int32_t * detectionOutData,const Shape & detectionOutShape)1440 bool detectionPostprocessFloat32(
1441 const float* scoreData, const Shape& scoreShape, const float* deltaData,
1442 const Shape& deltaShape, const float* anchorData, const Shape& anchorShape, float scaleY,
1443 float scaleX, float scaleH, float scaleW, bool useRegularNms, int32_t maxNumDetections,
1444 int32_t maxClassesPerDetection, int32_t maxNumDetectionsPerClass, float iouThreshold,
1445 float scoreThreshold, bool isBGInLabel, float* scoreOutData, const Shape& scoreOutShape,
1446 float* roiOutData, const Shape& roiOutShape, int32_t* classOutData,
1447 const Shape& classOutShape, int32_t* detectionOutData, const Shape& detectionOutShape) {
1448 const uint32_t kRoiDim = 4;
1449 uint32_t numBatches = getSizeOfDimension(scoreShape, 0);
1450 uint32_t numAnchors = getSizeOfDimension(scoreShape, 1);
1451 uint32_t numClasses = getSizeOfDimension(scoreShape, 2);
1452 uint32_t lengthBoxEncoding = getSizeOfDimension(deltaShape, 2);
1453 uint32_t numOutDetection = getSizeOfDimension(scoreOutShape, 1);
1454
1455 memset(scoreOutData, 0, getNumberOfElements(scoreOutShape) * sizeof(float));
1456 memset(roiOutData, 0, getNumberOfElements(roiOutShape) * sizeof(float));
1457 memset(classOutData, 0, getNumberOfElements(classOutShape) * sizeof(int32_t));
1458 memset(detectionOutData, 0, getNumberOfElements(detectionOutShape) * sizeof(int32_t));
1459
1460 const float* scoreBase = scoreData;
1461 const float* deltaBase = deltaData;
1462 float* scoreOutBase = scoreOutData;
1463 float* roiOutBase = roiOutData;
1464 int32_t* classOutBase = classOutData;
1465 std::vector<float> roiBuffer(numAnchors * kRoiDim);
1466 std::vector<float> scoreBuffer(numAnchors);
1467 for (uint32_t b = 0; b < numBatches; b++) {
1468 const float* anchorBase = anchorData;
1469 for (uint32_t a = 0; a < numAnchors; a++) {
1470 float yCtr = anchorBase[0] + anchorBase[2] * deltaBase[0] / scaleY;
1471 float xCtr = anchorBase[1] + anchorBase[3] * deltaBase[1] / scaleX;
1472 float hHalf = anchorBase[2] * std::exp(deltaBase[2] / scaleH) * 0.5f;
1473 float wHalf = anchorBase[3] * std::exp(deltaBase[3] / scaleW) * 0.5f;
1474 roiBuffer[a * kRoiDim] = yCtr - hHalf;
1475 roiBuffer[a * kRoiDim + 1] = xCtr - wHalf;
1476 roiBuffer[a * kRoiDim + 2] = yCtr + hHalf;
1477 roiBuffer[a * kRoiDim + 3] = xCtr + wHalf;
1478 anchorBase += kRoiDim;
1479 deltaBase += lengthBoxEncoding;
1480 }
1481
1482 if (useRegularNms) {
1483 std::vector<uint32_t> select;
1484 box_with_nms_limit::hardNmsMultiClass(
1485 scoreBase, numClasses, numAnchors, scoreThreshold, iouThreshold,
1486 maxNumDetections, maxNumDetectionsPerClass,
1487 [&roiBuffer, numClasses](uint32_t ind) {
1488 return roiBuffer.data() + (ind / numClasses) * kRoiDim;
1489 },
1490 &select);
1491 for (uint32_t i = 0; i < select.size(); i++) {
1492 uint32_t ind = select[i];
1493 scoreOutBase[i] = scoreBase[ind];
1494 memcpy(roiOutBase + i * kRoiDim, &roiBuffer[(ind / numClasses) * kRoiDim],
1495 kRoiDim * sizeof(float));
1496 classOutBase[i] = (ind % numClasses) - (isBGInLabel ? 0 : 1);
1497 }
1498 *detectionOutData++ = select.size();
1499 } else {
1500 uint32_t numOutClasses = std::min<uint32_t>(numClasses - 1, maxClassesPerDetection);
1501 std::vector<float> maxScores(numAnchors);
1502 for (uint32_t a = 0; a < numAnchors; a++) {
1503 maxScores[a] = *std::max_element(scoreBase + a * numClasses + 1,
1504 scoreBase + (a + 1) * numClasses);
1505 }
1506 std::vector<uint32_t> select;
1507 for (uint32_t a = 0; a < numAnchors; a++) {
1508 if (maxScores[a] > scoreThreshold) {
1509 select.push_back(a);
1510 }
1511 }
1512 uint32_t* selectEnd = box_with_nms_limit::hardNmsSingleClass(
1513 maxScores.data(), iouThreshold, maxNumDetections,
1514 [&roiBuffer](uint32_t ind) { return roiBuffer.data() + ind * kRoiDim; },
1515 select.data(), select.size());
1516 select.resize(selectEnd - select.data());
1517 float* scoreOutPtr = scoreOutBase;
1518 float* roiOutPtr = roiOutBase;
1519 int32_t* classOutPtr = classOutBase;
1520 for (auto i : select) {
1521 const float* score = scoreBase + i * numClasses;
1522 std::vector<uint32_t> scoreInds(numClasses - 1);
1523 std::iota(scoreInds.begin(), scoreInds.end(), 1);
1524 std::sort(scoreInds.begin(), scoreInds.end(),
1525 [&score](const uint32_t lhs, const uint32_t rhs) {
1526 return score[lhs] > score[rhs];
1527 });
1528 for (uint32_t c = 0; c < numOutClasses; c++) {
1529 *scoreOutPtr++ = score[scoreInds[c]];
1530 memcpy(roiOutPtr, &roiBuffer[i * kRoiDim], kRoiDim * sizeof(float));
1531 roiOutPtr += kRoiDim;
1532 *classOutPtr++ = scoreInds[c] - (isBGInLabel ? 0 : 1);
1533 }
1534 }
1535 *detectionOutData++ = select.size() * numOutClasses;
1536 }
1537 scoreBase += numAnchors * numClasses;
1538 scoreOutBase += numOutDetection;
1539 roiOutBase += numOutDetection * kRoiDim;
1540 classOutBase += numOutDetection;
1541 }
1542 return true;
1543 }
1544
detectionPostprocessFloat16(const _Float16 * scoreData,const Shape & scoreShape,const _Float16 * deltaData,const Shape & deltaShape,const _Float16 * anchorData,const Shape & anchorShape,float scaleY,float scaleX,float scaleH,float scaleW,bool useRegularNms,int32_t maxNumDetections,int32_t maxClassesPerDetection,int32_t maxNumDetectionsPerClass,float iouThreshold,float scoreThreshold,bool isBGInLabel,_Float16 * scoreOutData,const Shape & scoreOutShape,_Float16 * roiOutData,const Shape & roiOutShape,int32_t * classOutData,const Shape & classOutShape,int32_t * detectionOutData,const Shape & detectionOutShape)1545 bool detectionPostprocessFloat16(
1546 const _Float16* scoreData, const Shape& scoreShape, const _Float16* deltaData,
1547 const Shape& deltaShape, const _Float16* anchorData, const Shape& anchorShape, float scaleY,
1548 float scaleX, float scaleH, float scaleW, bool useRegularNms, int32_t maxNumDetections,
1549 int32_t maxClassesPerDetection, int32_t maxNumDetectionsPerClass, float iouThreshold,
1550 float scoreThreshold, bool isBGInLabel, _Float16* scoreOutData, const Shape& scoreOutShape,
1551 _Float16* roiOutData, const Shape& roiOutShape, int32_t* classOutData,
1552 const Shape& classOutShape, int32_t* detectionOutData, const Shape& detectionOutShape) {
1553 std::vector<float> scores_float32(getNumberOfElements(scoreShape));
1554 convertFloat16ToFloat32(scoreData, &scores_float32);
1555 std::vector<float> delta_float32(getNumberOfElements(deltaShape));
1556 convertFloat16ToFloat32(deltaData, &delta_float32);
1557 std::vector<float> anchor_float32(getNumberOfElements(anchorShape));
1558 convertFloat16ToFloat32(anchorData, &anchor_float32);
1559 std::vector<float> outputScore_float32(getNumberOfElements(scoreOutShape));
1560 std::vector<float> outputRoi_float32(getNumberOfElements(roiOutShape));
1561 NN_RET_CHECK(detectionPostprocessFloat32(
1562 scores_float32.data(), scoreShape, delta_float32.data(), deltaShape,
1563 anchor_float32.data(), anchorShape, scaleY, scaleX, scaleH, scaleW, useRegularNms,
1564 maxNumDetections, maxClassesPerDetection, maxNumDetectionsPerClass, iouThreshold,
1565 scoreThreshold, isBGInLabel, outputScore_float32.data(), scoreOutShape,
1566 outputRoi_float32.data(), roiOutShape, classOutData, classOutShape, detectionOutData,
1567 detectionOutShape));
1568 convertFloat32ToFloat16(outputScore_float32, scoreOutData);
1569 convertFloat32ToFloat16(outputRoi_float32, roiOutData);
1570 return true;
1571 }
1572
1573 } // namespace
1574
validate(const IOperationValidationContext * context)1575 bool validate(const IOperationValidationContext* context) {
1576 NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs);
1577 NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs);
1578 std::vector<OperandType> inExpectedTypes;
1579 std::vector<OperandType> outExpectedTypes;
1580 auto inputType = context->getInputType(kScoreTensor);
1581 if (inputType == OperandType::TENSOR_FLOAT16) {
1582 inExpectedTypes = {OperandType::TENSOR_FLOAT16, OperandType::TENSOR_FLOAT16,
1583 OperandType::TENSOR_FLOAT16, OperandType::FLOAT16,
1584 OperandType::FLOAT16, OperandType::FLOAT16,
1585 OperandType::FLOAT16, OperandType::BOOL,
1586 OperandType::INT32, OperandType::INT32,
1587 OperandType::INT32, OperandType::FLOAT16,
1588 OperandType::FLOAT16, OperandType::BOOL};
1589 } else if (inputType == OperandType::TENSOR_FLOAT32) {
1590 inExpectedTypes = {OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
1591 OperandType::TENSOR_FLOAT32, OperandType::FLOAT32,
1592 OperandType::FLOAT32, OperandType::FLOAT32,
1593 OperandType::FLOAT32, OperandType::BOOL,
1594 OperandType::INT32, OperandType::INT32,
1595 OperandType::INT32, OperandType::FLOAT32,
1596 OperandType::FLOAT32, OperandType::BOOL};
1597 } else {
1598 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
1599 }
1600 NN_RET_CHECK(validateInputTypes(context, inExpectedTypes));
1601 NN_RET_CHECK(validateOutputTypes(
1602 context, {inputType, inputType, OperandType::TENSOR_INT32, OperandType::TENSOR_INT32}));
1603 return validateHalVersion(context, HalVersion::V1_2);
1604 }
1605
prepare(IOperationExecutionContext * context)1606 bool prepare(IOperationExecutionContext* context) {
1607 Shape scoreShape = context->getInputShape(kScoreTensor);
1608 Shape deltasShape = context->getInputShape(kDeltaTensor);
1609 Shape anchorsShape = context->getInputShape(kAnchorTensor);
1610 Shape outputScoreShape = context->getOutputShape(kOutputScoreTensor);
1611 Shape outputRoiShape = context->getOutputShape(kOutputRoiTensor);
1612 Shape outputClassShape = context->getOutputShape(kOutputClassTensor);
1613 Shape outputDetectionShape = context->getOutputShape(kOutputDetectionTensor);
1614
1615 NN_RET_CHECK_EQ(getNumberOfDimensions(scoreShape), 3);
1616 NN_RET_CHECK_EQ(getNumberOfDimensions(deltasShape), 3);
1617 NN_RET_CHECK_EQ(getNumberOfDimensions(anchorsShape), 2);
1618
1619 const uint32_t kRoiDim = 4;
1620 uint32_t numBatches = getSizeOfDimension(scoreShape, 0);
1621 uint32_t numAnchors = getSizeOfDimension(scoreShape, 1);
1622 uint32_t numClasses = getSizeOfDimension(scoreShape, 2);
1623 uint32_t lengthBoxEncoding = getSizeOfDimension(deltasShape, 2);
1624 uint32_t maxNumDetections = context->getInputValue<int32_t>(kMaxNumDetectionScalar);
1625 uint32_t maxClassesPerDetection =
1626 context->getInputValue<int32_t>(kMaxClassesPerDetectionScalar);
1627 uint32_t numOutDetections = maxNumDetections;
1628
1629 NN_RET_CHECK_EQ(getSizeOfDimension(deltasShape, 0), numBatches);
1630 NN_RET_CHECK_EQ(getSizeOfDimension(deltasShape, 1), numAnchors);
1631 NN_RET_CHECK_EQ(getSizeOfDimension(anchorsShape, 0), numAnchors);
1632 NN_RET_CHECK_EQ(getSizeOfDimension(anchorsShape, 1), kRoiDim);
1633
1634 if (scoreShape.type == OperandType::TENSOR_FLOAT32) {
1635 NN_RET_CHECK_GT(context->getInputValue<float>(kScaleYScalar), 0);
1636 NN_RET_CHECK_GT(context->getInputValue<float>(kScaleXScalar), 0);
1637 NN_RET_CHECK_GT(context->getInputValue<float>(kScaleHScalar), 0);
1638 NN_RET_CHECK_GT(context->getInputValue<float>(kScaleWScalar), 0);
1639 NN_RET_CHECK_GE(context->getInputValue<float>(kScoreThresholdScalar), 0);
1640 NN_RET_CHECK_GE(context->getInputValue<float>(kIoUThresholdScalar), 0);
1641 } else if (scoreShape.type == OperandType::TENSOR_FLOAT16) {
1642 NN_RET_CHECK(context->getInputValue<_Float16>(kScaleYScalar) > 0);
1643 NN_RET_CHECK(context->getInputValue<_Float16>(kScaleXScalar) > 0);
1644 NN_RET_CHECK(context->getInputValue<_Float16>(kScaleHScalar) > 0);
1645 NN_RET_CHECK(context->getInputValue<_Float16>(kScaleWScalar) > 0);
1646 NN_RET_CHECK(context->getInputValue<_Float16>(kScoreThresholdScalar) >= 0);
1647 NN_RET_CHECK(context->getInputValue<_Float16>(kIoUThresholdScalar) >= 0);
1648 } else {
1649 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
1650 }
1651 NN_RET_CHECK_GT(numClasses, 1);
1652 NN_RET_CHECK_GE(lengthBoxEncoding, 4);
1653 NN_RET_CHECK_GT(maxNumDetections, 0);
1654 if (context->getInputValue<bool>(kUseRegularNmsScalar)) {
1655 NN_RET_CHECK_GT(context->getInputValue<int32_t>(kMaxNumDetectionPerClassScalar), 0);
1656 } else {
1657 NN_RET_CHECK_GT(maxClassesPerDetection, 0);
1658 numOutDetections *= maxClassesPerDetection;
1659 }
1660
1661 outputScoreShape.type = scoreShape.type;
1662 outputScoreShape.dimensions = {numBatches, numOutDetections};
1663 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, outputScoreShape));
1664
1665 outputRoiShape.type = anchorsShape.type;
1666 outputRoiShape.dimensions = {numBatches, numOutDetections, 4};
1667 NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, outputRoiShape));
1668
1669 outputClassShape.type = OperandType::TENSOR_INT32;
1670 outputClassShape.dimensions = {numBatches, numOutDetections};
1671 NN_RET_CHECK(context->setOutputShape(kOutputClassTensor, outputClassShape));
1672
1673 outputDetectionShape.type = OperandType::TENSOR_INT32;
1674 outputDetectionShape.dimensions = {numBatches};
1675 NN_RET_CHECK(context->setOutputShape(kOutputDetectionTensor, outputDetectionShape));
1676 return true;
1677 }
1678
execute(IOperationExecutionContext * context)1679 bool execute(IOperationExecutionContext* context) {
1680 NNTRACE_TRANS("detectionPostProcess");
1681 switch (context->getInputType(kScoreTensor)) {
1682 case OperandType::TENSOR_FLOAT16: {
1683 return detectionPostprocessFloat16(
1684 context->getInputBuffer<_Float16>(kScoreTensor),
1685 context->getInputShape(kScoreTensor),
1686 context->getInputBuffer<_Float16>(kDeltaTensor),
1687 context->getInputShape(kDeltaTensor),
1688 context->getInputBuffer<_Float16>(kAnchorTensor),
1689 context->getInputShape(kAnchorTensor),
1690 context->getInputValue<_Float16>(kScaleYScalar),
1691 context->getInputValue<_Float16>(kScaleXScalar),
1692 context->getInputValue<_Float16>(kScaleHScalar),
1693 context->getInputValue<_Float16>(kScaleWScalar),
1694 context->getInputValue<bool>(kUseRegularNmsScalar),
1695 context->getInputValue<int32_t>(kMaxNumDetectionScalar),
1696 context->getInputValue<int32_t>(kMaxClassesPerDetectionScalar),
1697 context->getInputValue<int32_t>(kMaxNumDetectionPerClassScalar),
1698 context->getInputValue<_Float16>(kIoUThresholdScalar),
1699 context->getInputValue<_Float16>(kScoreThresholdScalar),
1700 context->getInputValue<bool>(kIsBGInLabelScalar),
1701 context->getOutputBuffer<_Float16>(kOutputScoreTensor),
1702 context->getOutputShape(kOutputScoreTensor),
1703 context->getOutputBuffer<_Float16>(kOutputRoiTensor),
1704 context->getOutputShape(kOutputRoiTensor),
1705 context->getOutputBuffer<int32_t>(kOutputClassTensor),
1706 context->getOutputShape(kOutputClassTensor),
1707 context->getOutputBuffer<int32_t>(kOutputDetectionTensor),
1708 context->getOutputShape(kOutputDetectionTensor));
1709 }
1710 case OperandType::TENSOR_FLOAT32: {
1711 return detectionPostprocessFloat32(
1712 context->getInputBuffer<float>(kScoreTensor),
1713 context->getInputShape(kScoreTensor),
1714 context->getInputBuffer<float>(kDeltaTensor),
1715 context->getInputShape(kDeltaTensor),
1716 context->getInputBuffer<float>(kAnchorTensor),
1717 context->getInputShape(kAnchorTensor),
1718 context->getInputValue<float>(kScaleYScalar),
1719 context->getInputValue<float>(kScaleXScalar),
1720 context->getInputValue<float>(kScaleHScalar),
1721 context->getInputValue<float>(kScaleWScalar),
1722 context->getInputValue<bool>(kUseRegularNmsScalar),
1723 context->getInputValue<int32_t>(kMaxNumDetectionScalar),
1724 context->getInputValue<int32_t>(kMaxClassesPerDetectionScalar),
1725 context->getInputValue<int32_t>(kMaxNumDetectionPerClassScalar),
1726 context->getInputValue<float>(kIoUThresholdScalar),
1727 context->getInputValue<float>(kScoreThresholdScalar),
1728 context->getInputValue<bool>(kIsBGInLabelScalar),
1729 context->getOutputBuffer<float>(kOutputScoreTensor),
1730 context->getOutputShape(kOutputScoreTensor),
1731 context->getOutputBuffer<float>(kOutputRoiTensor),
1732 context->getOutputShape(kOutputRoiTensor),
1733 context->getOutputBuffer<int32_t>(kOutputClassTensor),
1734 context->getOutputShape(kOutputClassTensor),
1735 context->getOutputBuffer<int32_t>(kOutputDetectionTensor),
1736 context->getOutputShape(kOutputDetectionTensor));
1737 }
1738 default:
1739 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
1740 }
1741 }
1742
1743 } // namespace detection_postprocess
1744
1745 } // namespace bbox_ops
1746
1747 NN_REGISTER_OPERATION(AXIS_ALIGNED_BBOX_TRANSFORM,
1748 bbox_ops::axis_aligned_bbox_transform::kOperationName,
1749 bbox_ops::axis_aligned_bbox_transform::validate,
1750 bbox_ops::axis_aligned_bbox_transform::prepare,
1751 bbox_ops::axis_aligned_bbox_transform::execute, .allowZeroSizedInput = true);
1752
1753 NN_REGISTER_OPERATION(BOX_WITH_NMS_LIMIT, bbox_ops::box_with_nms_limit::kOperationName,
1754 bbox_ops::box_with_nms_limit::validate, bbox_ops::box_with_nms_limit::prepare,
1755 bbox_ops::box_with_nms_limit::execute, .allowZeroSizedInput = true);
1756
1757 NN_REGISTER_OPERATION(GENERATE_PROPOSALS, bbox_ops::generate_proposals::kOperationName,
1758 bbox_ops::generate_proposals::validate, bbox_ops::generate_proposals::prepare,
1759 bbox_ops::generate_proposals::execute);
1760
1761 NN_REGISTER_OPERATION(DETECTION_POSTPROCESSING, bbox_ops::detection_postprocess::kOperationName,
1762 bbox_ops::detection_postprocess::validate,
1763 bbox_ops::detection_postprocess::prepare,
1764 bbox_ops::detection_postprocess::execute);
1765 } // namespace nn
1766 } // namespace android
1767