1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Operations"
18
19 #include <tensorflow/lite/kernels/internal/common.h>
20
21 #include <algorithm>
22 #include <cfloat>
23 #include <cmath>
24 #include <vector>
25
26 #include "CpuOperationUtils.h"
27 #include "Operations.h"
28 #include "Tracing.h"
29
30 namespace android {
31 namespace nn {
32
33 #define ANDROID_NN_GROUPED_CONV_PARAMETERS \
34 uint32_t numBatches = getSizeOfDimension(inputShape, 0); \
35 uint32_t inputHeight = getSizeOfDimension(inputShape, 1); \
36 uint32_t inputWidth = getSizeOfDimension(inputShape, 2); \
37 uint32_t inputDepth = getSizeOfDimension(inputShape, 3); \
38 uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \
39 uint32_t filterWidth = getSizeOfDimension(filterShape, 2); \
40 uint32_t filterDepth = getSizeOfDimension(filterShape, 3); \
41 uint32_t outputHeight = getSizeOfDimension(outputShape, 1); \
42 uint32_t outputWidth = getSizeOfDimension(outputShape, 2); \
43 uint32_t outputDepth = getSizeOfDimension(outputShape, 3); \
44 uint32_t outputGroupDepth = outputDepth / numGroups;
45
groupedConvFloat32(const float * inputData,const Shape & inputShape,const float * filterData,const Shape & filterShape,const float * biasData,const Shape & biasShape,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,int32_t activation,float * outputData,const Shape & outputShape)46 bool groupedConvFloat32(const float* inputData, const Shape& inputShape, const float* filterData,
47 const Shape& filterShape, const float* biasData, const Shape& biasShape,
48 int32_t padding_left, int32_t padding_right, int32_t padding_top,
49 int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
50 int32_t numGroups, int32_t activation, float* outputData,
51 const Shape& outputShape) {
52 NNTRACE_TRANS("groupConvFloat32");
53 ANDROID_NN_GROUPED_CONV_PARAMETERS
54
55 float output_activation_min = 0.0f, output_activation_max = 0.0f;
56 CalculateActivationRangeFloat(activation, &output_activation_min, &output_activation_max);
57
58 const float* inputBase = inputData;
59 float* outPtr = outputData;
60 for (uint32_t b = 0; b < numBatches; b++) {
61 for (uint32_t h = 0; h < outputHeight; h++) {
62 for (uint32_t w = 0; w < outputWidth; w++) {
63 const float* filterBase = filterData;
64 for (uint32_t g = 0; g < numGroups; g++) {
65 for (uint32_t d = 0; d < outputGroupDepth; d++) {
66 int32_t wInputOrigin =
67 static_cast<int32_t>(w) * stride_width - padding_left;
68 int32_t hInputOrigin =
69 static_cast<int32_t>(h) * stride_height - padding_top;
70 float sum = 0.0f;
71 for (uint32_t i = 0; i < filterHeight; i++) {
72 for (uint32_t j = 0; j < filterWidth; j++) {
73 for (uint32_t k = 0; k < filterDepth; k++) {
74 int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
75 int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
76 uint32_t dInput = filterDepth * g + k;
77 if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
78 wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
79 uint32_t filterIndex =
80 i * filterWidth * filterDepth + j * filterDepth + k;
81 uint32_t inputIndex = hInput * inputWidth * inputDepth +
82 wInput * inputDepth + dInput;
83 sum += filterBase[filterIndex] * inputBase[inputIndex];
84 }
85 }
86 }
87 }
88 sum += biasData[g * outputGroupDepth + d];
89 sum = std::max(std::min(sum, output_activation_max), output_activation_min);
90 outPtr[d] = sum;
91 filterBase += filterHeight * filterWidth * filterDepth;
92 }
93 outPtr += outputGroupDepth;
94 }
95 }
96 }
97 inputBase += inputHeight * inputWidth * inputDepth;
98 }
99
100 return true;
101 }
102
103 template <typename T>
groupedConvQuant8(const T * inputData,const Shape & inputShape,const T * filterData,const Shape & filterShape,const int32_t * biasData,const Shape & biasShape,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,int32_t activation,T * outputData,const Shape & outputShape)104 bool groupedConvQuant8(const T* inputData, const Shape& inputShape, const T* filterData,
105 const Shape& filterShape, const int32_t* biasData, const Shape& biasShape,
106 int32_t padding_left, int32_t padding_right, int32_t padding_top,
107 int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
108 int32_t numGroups, int32_t activation, T* outputData,
109 const Shape& outputShape) {
110 NNTRACE_TRANS("groupConvQuant8");
111 ANDROID_NN_GROUPED_CONV_PARAMETERS
112
113 int32_t inputOffset = -inputShape.offset;
114 int32_t filterOffset = -filterShape.offset;
115 int32_t outputOffset = outputShape.offset;
116
117 double realMultiplier = 0.0;
118 int32_t outputMultiplier = 0;
119 int32_t outputShift = 0;
120 NN_RET_CHECK(GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape, outputShape,
121 &realMultiplier));
122 int exponent;
123 NN_RET_CHECK(QuantizeMultiplier(realMultiplier, &outputMultiplier, &exponent));
124 outputShift = -exponent;
125
126 int32_t output_activation_min = 0, output_activation_max = 0;
127 CalculateActivationRange<T>(activation, outputShape, &output_activation_min,
128 &output_activation_max);
129
130 const T* inputBase = inputData;
131 T* outPtr = outputData;
132 for (uint32_t b = 0; b < numBatches; b++) {
133 for (uint32_t h = 0; h < outputHeight; h++) {
134 for (uint32_t w = 0; w < outputWidth; w++) {
135 const T* filterBase = filterData;
136 for (uint32_t g = 0; g < numGroups; g++) {
137 for (uint32_t d = 0; d < outputGroupDepth; d++) {
138 int32_t wInputOrigin =
139 static_cast<int32_t>(w) * stride_width - padding_left;
140 int32_t hInputOrigin =
141 static_cast<int32_t>(h) * stride_height - padding_top;
142 int32_t sum = 0.0f;
143 for (uint32_t i = 0; i < filterHeight; i++) {
144 for (uint32_t j = 0; j < filterWidth; j++) {
145 for (uint32_t k = 0; k < filterDepth; k++) {
146 int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
147 int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
148 uint32_t dInput = filterDepth * g + k;
149 if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
150 wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
151 uint32_t filterIndex =
152 i * filterWidth * filterDepth + j * filterDepth + k;
153 uint32_t inputIndex = hInput * inputWidth * inputDepth +
154 wInput * inputDepth + dInput;
155 sum += (static_cast<int32_t>(filterBase[filterIndex]) +
156 filterOffset) *
157 (static_cast<int32_t>(inputBase[inputIndex]) +
158 inputOffset);
159 }
160 }
161 }
162 }
163 sum += biasData[g * outputGroupDepth + d];
164 sum = tflite::MultiplyByQuantizedMultiplier(sum, outputMultiplier,
165 -outputShift);
166 sum += outputOffset;
167 sum = std::max(std::min(sum, output_activation_max), output_activation_min);
168 outPtr[d] = static_cast<T>(sum);
169 filterBase += filterHeight * filterWidth * filterDepth;
170 }
171 outPtr += outputGroupDepth;
172 }
173 }
174 }
175 inputBase += inputHeight * inputWidth * inputDepth;
176 }
177
178 return true;
179 }
180
181 template bool groupedConvQuant8<int8_t>(const int8_t* inputData, const Shape& inputShape,
182 const int8_t* filterData, const Shape& filterShape,
183 const int32_t* biasData, const Shape& biasShape,
184 int32_t padding_left, int32_t padding_right,
185 int32_t padding_top, int32_t padding_bottom,
186 int32_t stride_width, int32_t stride_height,
187 int32_t numGroups, int32_t activation, int8_t* outputData,
188 const Shape& outputShape);
189
190 template bool groupedConvQuant8<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
191 const uint8_t* filterData, const Shape& filterShape,
192 const int32_t* biasData, const Shape& biasShape,
193 int32_t padding_left, int32_t padding_right,
194 int32_t padding_top, int32_t padding_bottom,
195 int32_t stride_width, int32_t stride_height,
196 int32_t numGroups, int32_t activation, uint8_t* outputData,
197 const Shape& outputShape);
198
199 template <typename T>
groupedConvQuant8PerChannel(const T * inputData,const Shape & inputShape,const int8_t * filterData,const Shape & filterShape,const float * filterScales,const int32_t * biasData,const Shape & biasShape,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,int32_t activation,T * outputData,const Shape & outputShape)200 bool groupedConvQuant8PerChannel(const T* inputData, const Shape& inputShape,
201 const int8_t* filterData, const Shape& filterShape,
202 const float* filterScales, const int32_t* biasData,
203 const Shape& biasShape, int32_t padding_left,
204 int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
205 int32_t stride_width, int32_t stride_height, int32_t numGroups,
206 int32_t activation, T* outputData, const Shape& outputShape) {
207 NNTRACE_TRANS("groupConvQuant8");
208 ANDROID_NN_GROUPED_CONV_PARAMETERS
209
210 int32_t inputOffset = -inputShape.offset;
211 int32_t outputOffset = outputShape.offset;
212
213 auto realMultiplier = std::vector<double>(outputDepth, .0f);
214 auto outputMultiplier = std::vector<int32_t>(outputDepth, 0);
215 auto outputShift = std::vector<int32_t>(outputDepth, 0);
216
217 for (int i = 0; i < outputDepth; ++i) {
218 Shape filterChannelShape = filterShape;
219 filterChannelShape.scale = filterScales[i];
220 Shape biasChannelShape = biasShape;
221 biasChannelShape.scale = filterScales[i] * inputShape.scale;
222
223 NN_RET_CHECK(GetQuantizedConvolutionMultipler(
224 inputShape, filterChannelShape, biasChannelShape, outputShape, &realMultiplier[i]));
225 int exponent;
226 NN_RET_CHECK(QuantizeMultiplier(realMultiplier[i], &outputMultiplier[i], &exponent));
227 outputShift[i] = -exponent;
228 }
229
230 int32_t output_activation_min = 0, output_activation_max = 0;
231 CalculateActivationRange<T>(activation, outputShape, &output_activation_min,
232 &output_activation_max);
233
234 const T* inputBase = inputData;
235 T* outPtr = outputData;
236 for (uint32_t b = 0; b < numBatches; b++) {
237 for (uint32_t h = 0; h < outputHeight; h++) {
238 for (uint32_t w = 0; w < outputWidth; w++) {
239 const int8_t* filterBase = filterData;
240 for (uint32_t g = 0; g < numGroups; g++) {
241 for (uint32_t d = 0; d < outputGroupDepth; d++) {
242 int32_t wInputOrigin =
243 static_cast<int32_t>(w) * stride_width - padding_left;
244 int32_t hInputOrigin =
245 static_cast<int32_t>(h) * stride_height - padding_top;
246 int32_t sum = 0.0f;
247 for (uint32_t i = 0; i < filterHeight; i++) {
248 for (uint32_t j = 0; j < filterWidth; j++) {
249 for (uint32_t k = 0; k < filterDepth; k++) {
250 int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
251 int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
252 uint32_t dInput = filterDepth * g + k;
253 if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
254 wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
255 uint32_t filterIndex =
256 i * filterWidth * filterDepth + j * filterDepth + k;
257 uint32_t inputIndex = hInput * inputWidth * inputDepth +
258 wInput * inputDepth + dInput;
259 sum += (static_cast<int32_t>(filterBase[filterIndex])) *
260 (static_cast<int32_t>(inputBase[inputIndex]) +
261 inputOffset);
262 }
263 }
264 }
265 }
266 int channelIndex = g * outputGroupDepth + d;
267 sum += biasData[channelIndex];
268 sum = tflite::MultiplyByQuantizedMultiplier(
269 sum, outputMultiplier[channelIndex], -outputShift[channelIndex]);
270 sum += outputOffset;
271 sum = std::max(std::min(sum, output_activation_max), output_activation_min);
272 outPtr[d] = static_cast<T>(sum);
273 filterBase += filterHeight * filterWidth * filterDepth;
274 }
275 outPtr += outputGroupDepth;
276 }
277 }
278 }
279 inputBase += inputHeight * inputWidth * inputDepth;
280 }
281
282 return true;
283 }
284
groupedConvFloat16(const _Float16 * inputData,const Shape & inputShape,const _Float16 * filterData,const Shape & filterShape,const _Float16 * biasData,const Shape & biasShape,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,int32_t activation,_Float16 * outputData,const Shape & outputShape)285 bool groupedConvFloat16(const _Float16* inputData, const Shape& inputShape,
286 const _Float16* filterData, const Shape& filterShape,
287 const _Float16* biasData, const Shape& biasShape, int32_t padding_left,
288 int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
289 int32_t stride_width, int32_t stride_height, int32_t numGroups,
290 int32_t activation, _Float16* outputData, const Shape& outputShape) {
291 NNTRACE_TRANS("groupConvFloat16");
292
293 std::vector<float> inputData_float32(getNumberOfElements(inputShape));
294 std::vector<float> filterData_float32(getNumberOfElements(filterShape));
295 std::vector<float> biasData_float32(getNumberOfElements(biasShape));
296 std::vector<float> outputData_float32(getNumberOfElements(outputShape));
297
298 convertFloat16ToFloat32(inputData, &inputData_float32);
299 convertFloat16ToFloat32(filterData, &filterData_float32);
300 convertFloat16ToFloat32(biasData, &biasData_float32);
301
302 groupedConvFloat32(inputData_float32.data(), inputShape, filterData_float32.data(), filterShape,
303 biasData_float32.data(), biasShape, padding_left, padding_right, padding_top,
304 padding_bottom, stride_width, stride_height, numGroups, activation,
305 outputData_float32.data(), outputShape);
306 convertFloat32ToFloat16(outputData_float32, outputData);
307
308 return true;
309 }
310
311 template bool groupedConvQuant8PerChannel<uint8_t>(
312 const uint8_t* inputData, const Shape& inputShape, const int8_t* filterData,
313 const Shape& filterShape, const float* filterScales, const int32_t* biasData,
314 const Shape& biasShape, int32_t padding_left, int32_t padding_right, int32_t padding_top,
315 int32_t padding_bottom, int32_t stride_width, int32_t stride_height, int32_t numGroups,
316 int32_t activation, uint8_t* outputData, const Shape& outputShape);
317
318 template bool groupedConvQuant8PerChannel<int8_t>(
319 const int8_t* inputData, const Shape& inputShape, const int8_t* filterData,
320 const Shape& filterShape, const float* filterScales, const int32_t* biasData,
321 const Shape& biasShape, int32_t padding_left, int32_t padding_right, int32_t padding_top,
322 int32_t padding_bottom, int32_t stride_width, int32_t stride_height, int32_t numGroups,
323 int32_t activation, int8_t* outputData, const Shape& outputShape);
324
325 #undef ANDROID_NN_GROUPED_CONV_PARAMETERS
326 } // namespace nn
327 } // namespace android
328