1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Operations"
18 
19 #include <tensorflow/lite/kernels/internal/common.h>
20 
21 #include <algorithm>
22 #include <cfloat>
23 #include <cmath>
24 #include <vector>
25 
26 #include "CpuOperationUtils.h"
27 #include "Operations.h"
28 #include "Tracing.h"
29 
30 namespace android {
31 namespace nn {
32 
33 #define ANDROID_NN_GROUPED_CONV_PARAMETERS                      \
34     uint32_t numBatches = getSizeOfDimension(inputShape, 0);    \
35     uint32_t inputHeight = getSizeOfDimension(inputShape, 1);   \
36     uint32_t inputWidth = getSizeOfDimension(inputShape, 2);    \
37     uint32_t inputDepth = getSizeOfDimension(inputShape, 3);    \
38     uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \
39     uint32_t filterWidth = getSizeOfDimension(filterShape, 2);  \
40     uint32_t filterDepth = getSizeOfDimension(filterShape, 3);  \
41     uint32_t outputHeight = getSizeOfDimension(outputShape, 1); \
42     uint32_t outputWidth = getSizeOfDimension(outputShape, 2);  \
43     uint32_t outputDepth = getSizeOfDimension(outputShape, 3);  \
44     uint32_t outputGroupDepth = outputDepth / numGroups;
45 
groupedConvFloat32(const float * inputData,const Shape & inputShape,const float * filterData,const Shape & filterShape,const float * biasData,const Shape & biasShape,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,int32_t activation,float * outputData,const Shape & outputShape)46 bool groupedConvFloat32(const float* inputData, const Shape& inputShape, const float* filterData,
47                         const Shape& filterShape, const float* biasData, const Shape& biasShape,
48                         int32_t padding_left, int32_t padding_right, int32_t padding_top,
49                         int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
50                         int32_t numGroups, int32_t activation, float* outputData,
51                         const Shape& outputShape) {
52     NNTRACE_TRANS("groupConvFloat32");
53     ANDROID_NN_GROUPED_CONV_PARAMETERS
54 
55     float output_activation_min = 0.0f, output_activation_max = 0.0f;
56     CalculateActivationRangeFloat(activation, &output_activation_min, &output_activation_max);
57 
58     const float* inputBase = inputData;
59     float* outPtr = outputData;
60     for (uint32_t b = 0; b < numBatches; b++) {
61         for (uint32_t h = 0; h < outputHeight; h++) {
62             for (uint32_t w = 0; w < outputWidth; w++) {
63                 const float* filterBase = filterData;
64                 for (uint32_t g = 0; g < numGroups; g++) {
65                     for (uint32_t d = 0; d < outputGroupDepth; d++) {
66                         int32_t wInputOrigin =
67                                 static_cast<int32_t>(w) * stride_width - padding_left;
68                         int32_t hInputOrigin =
69                                 static_cast<int32_t>(h) * stride_height - padding_top;
70                         float sum = 0.0f;
71                         for (uint32_t i = 0; i < filterHeight; i++) {
72                             for (uint32_t j = 0; j < filterWidth; j++) {
73                                 for (uint32_t k = 0; k < filterDepth; k++) {
74                                     int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
75                                     int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
76                                     uint32_t dInput = filterDepth * g + k;
77                                     if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
78                                         wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
79                                         uint32_t filterIndex =
80                                                 i * filterWidth * filterDepth + j * filterDepth + k;
81                                         uint32_t inputIndex = hInput * inputWidth * inputDepth +
82                                                               wInput * inputDepth + dInput;
83                                         sum += filterBase[filterIndex] * inputBase[inputIndex];
84                                     }
85                                 }
86                             }
87                         }
88                         sum += biasData[g * outputGroupDepth + d];
89                         sum = std::max(std::min(sum, output_activation_max), output_activation_min);
90                         outPtr[d] = sum;
91                         filterBase += filterHeight * filterWidth * filterDepth;
92                     }
93                     outPtr += outputGroupDepth;
94                 }
95             }
96         }
97         inputBase += inputHeight * inputWidth * inputDepth;
98     }
99 
100     return true;
101 }
102 
103 template <typename T>
groupedConvQuant8(const T * inputData,const Shape & inputShape,const T * filterData,const Shape & filterShape,const int32_t * biasData,const Shape & biasShape,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,int32_t activation,T * outputData,const Shape & outputShape)104 bool groupedConvQuant8(const T* inputData, const Shape& inputShape, const T* filterData,
105                        const Shape& filterShape, const int32_t* biasData, const Shape& biasShape,
106                        int32_t padding_left, int32_t padding_right, int32_t padding_top,
107                        int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
108                        int32_t numGroups, int32_t activation, T* outputData,
109                        const Shape& outputShape) {
110     NNTRACE_TRANS("groupConvQuant8");
111     ANDROID_NN_GROUPED_CONV_PARAMETERS
112 
113     int32_t inputOffset = -inputShape.offset;
114     int32_t filterOffset = -filterShape.offset;
115     int32_t outputOffset = outputShape.offset;
116 
117     double realMultiplier = 0.0;
118     int32_t outputMultiplier = 0;
119     int32_t outputShift = 0;
120     NN_RET_CHECK(GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape, outputShape,
121                                                   &realMultiplier));
122     int exponent;
123     NN_RET_CHECK(QuantizeMultiplier(realMultiplier, &outputMultiplier, &exponent));
124     outputShift = -exponent;
125 
126     int32_t output_activation_min = 0, output_activation_max = 0;
127     CalculateActivationRange<T>(activation, outputShape, &output_activation_min,
128                                 &output_activation_max);
129 
130     const T* inputBase = inputData;
131     T* outPtr = outputData;
132     for (uint32_t b = 0; b < numBatches; b++) {
133         for (uint32_t h = 0; h < outputHeight; h++) {
134             for (uint32_t w = 0; w < outputWidth; w++) {
135                 const T* filterBase = filterData;
136                 for (uint32_t g = 0; g < numGroups; g++) {
137                     for (uint32_t d = 0; d < outputGroupDepth; d++) {
138                         int32_t wInputOrigin =
139                                 static_cast<int32_t>(w) * stride_width - padding_left;
140                         int32_t hInputOrigin =
141                                 static_cast<int32_t>(h) * stride_height - padding_top;
142                         int32_t sum = 0.0f;
143                         for (uint32_t i = 0; i < filterHeight; i++) {
144                             for (uint32_t j = 0; j < filterWidth; j++) {
145                                 for (uint32_t k = 0; k < filterDepth; k++) {
146                                     int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
147                                     int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
148                                     uint32_t dInput = filterDepth * g + k;
149                                     if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
150                                         wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
151                                         uint32_t filterIndex =
152                                                 i * filterWidth * filterDepth + j * filterDepth + k;
153                                         uint32_t inputIndex = hInput * inputWidth * inputDepth +
154                                                               wInput * inputDepth + dInput;
155                                         sum += (static_cast<int32_t>(filterBase[filterIndex]) +
156                                                 filterOffset) *
157                                                (static_cast<int32_t>(inputBase[inputIndex]) +
158                                                 inputOffset);
159                                     }
160                                 }
161                             }
162                         }
163                         sum += biasData[g * outputGroupDepth + d];
164                         sum = tflite::MultiplyByQuantizedMultiplier(sum, outputMultiplier,
165                                                                     -outputShift);
166                         sum += outputOffset;
167                         sum = std::max(std::min(sum, output_activation_max), output_activation_min);
168                         outPtr[d] = static_cast<T>(sum);
169                         filterBase += filterHeight * filterWidth * filterDepth;
170                     }
171                     outPtr += outputGroupDepth;
172                 }
173             }
174         }
175         inputBase += inputHeight * inputWidth * inputDepth;
176     }
177 
178     return true;
179 }
180 
181 template bool groupedConvQuant8<int8_t>(const int8_t* inputData, const Shape& inputShape,
182                                         const int8_t* filterData, const Shape& filterShape,
183                                         const int32_t* biasData, const Shape& biasShape,
184                                         int32_t padding_left, int32_t padding_right,
185                                         int32_t padding_top, int32_t padding_bottom,
186                                         int32_t stride_width, int32_t stride_height,
187                                         int32_t numGroups, int32_t activation, int8_t* outputData,
188                                         const Shape& outputShape);
189 
190 template bool groupedConvQuant8<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
191                                          const uint8_t* filterData, const Shape& filterShape,
192                                          const int32_t* biasData, const Shape& biasShape,
193                                          int32_t padding_left, int32_t padding_right,
194                                          int32_t padding_top, int32_t padding_bottom,
195                                          int32_t stride_width, int32_t stride_height,
196                                          int32_t numGroups, int32_t activation, uint8_t* outputData,
197                                          const Shape& outputShape);
198 
199 template <typename T>
groupedConvQuant8PerChannel(const T * inputData,const Shape & inputShape,const int8_t * filterData,const Shape & filterShape,const float * filterScales,const int32_t * biasData,const Shape & biasShape,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,int32_t activation,T * outputData,const Shape & outputShape)200 bool groupedConvQuant8PerChannel(const T* inputData, const Shape& inputShape,
201                                  const int8_t* filterData, const Shape& filterShape,
202                                  const float* filterScales, const int32_t* biasData,
203                                  const Shape& biasShape, int32_t padding_left,
204                                  int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
205                                  int32_t stride_width, int32_t stride_height, int32_t numGroups,
206                                  int32_t activation, T* outputData, const Shape& outputShape) {
207     NNTRACE_TRANS("groupConvQuant8");
208     ANDROID_NN_GROUPED_CONV_PARAMETERS
209 
210     int32_t inputOffset = -inputShape.offset;
211     int32_t outputOffset = outputShape.offset;
212 
213     auto realMultiplier = std::vector<double>(outputDepth, .0f);
214     auto outputMultiplier = std::vector<int32_t>(outputDepth, 0);
215     auto outputShift = std::vector<int32_t>(outputDepth, 0);
216 
217     for (int i = 0; i < outputDepth; ++i) {
218         Shape filterChannelShape = filterShape;
219         filterChannelShape.scale = filterScales[i];
220         Shape biasChannelShape = biasShape;
221         biasChannelShape.scale = filterScales[i] * inputShape.scale;
222 
223         NN_RET_CHECK(GetQuantizedConvolutionMultipler(
224                 inputShape, filterChannelShape, biasChannelShape, outputShape, &realMultiplier[i]));
225         int exponent;
226         NN_RET_CHECK(QuantizeMultiplier(realMultiplier[i], &outputMultiplier[i], &exponent));
227         outputShift[i] = -exponent;
228     }
229 
230     int32_t output_activation_min = 0, output_activation_max = 0;
231     CalculateActivationRange<T>(activation, outputShape, &output_activation_min,
232                                 &output_activation_max);
233 
234     const T* inputBase = inputData;
235     T* outPtr = outputData;
236     for (uint32_t b = 0; b < numBatches; b++) {
237         for (uint32_t h = 0; h < outputHeight; h++) {
238             for (uint32_t w = 0; w < outputWidth; w++) {
239                 const int8_t* filterBase = filterData;
240                 for (uint32_t g = 0; g < numGroups; g++) {
241                     for (uint32_t d = 0; d < outputGroupDepth; d++) {
242                         int32_t wInputOrigin =
243                                 static_cast<int32_t>(w) * stride_width - padding_left;
244                         int32_t hInputOrigin =
245                                 static_cast<int32_t>(h) * stride_height - padding_top;
246                         int32_t sum = 0.0f;
247                         for (uint32_t i = 0; i < filterHeight; i++) {
248                             for (uint32_t j = 0; j < filterWidth; j++) {
249                                 for (uint32_t k = 0; k < filterDepth; k++) {
250                                     int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
251                                     int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
252                                     uint32_t dInput = filterDepth * g + k;
253                                     if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
254                                         wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
255                                         uint32_t filterIndex =
256                                                 i * filterWidth * filterDepth + j * filterDepth + k;
257                                         uint32_t inputIndex = hInput * inputWidth * inputDepth +
258                                                               wInput * inputDepth + dInput;
259                                         sum += (static_cast<int32_t>(filterBase[filterIndex])) *
260                                                (static_cast<int32_t>(inputBase[inputIndex]) +
261                                                 inputOffset);
262                                     }
263                                 }
264                             }
265                         }
266                         int channelIndex = g * outputGroupDepth + d;
267                         sum += biasData[channelIndex];
268                         sum = tflite::MultiplyByQuantizedMultiplier(
269                                 sum, outputMultiplier[channelIndex], -outputShift[channelIndex]);
270                         sum += outputOffset;
271                         sum = std::max(std::min(sum, output_activation_max), output_activation_min);
272                         outPtr[d] = static_cast<T>(sum);
273                         filterBase += filterHeight * filterWidth * filterDepth;
274                     }
275                     outPtr += outputGroupDepth;
276                 }
277             }
278         }
279         inputBase += inputHeight * inputWidth * inputDepth;
280     }
281 
282     return true;
283 }
284 
groupedConvFloat16(const _Float16 * inputData,const Shape & inputShape,const _Float16 * filterData,const Shape & filterShape,const _Float16 * biasData,const Shape & biasShape,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,int32_t activation,_Float16 * outputData,const Shape & outputShape)285 bool groupedConvFloat16(const _Float16* inputData, const Shape& inputShape,
286                         const _Float16* filterData, const Shape& filterShape,
287                         const _Float16* biasData, const Shape& biasShape, int32_t padding_left,
288                         int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
289                         int32_t stride_width, int32_t stride_height, int32_t numGroups,
290                         int32_t activation, _Float16* outputData, const Shape& outputShape) {
291     NNTRACE_TRANS("groupConvFloat16");
292 
293     std::vector<float> inputData_float32(getNumberOfElements(inputShape));
294     std::vector<float> filterData_float32(getNumberOfElements(filterShape));
295     std::vector<float> biasData_float32(getNumberOfElements(biasShape));
296     std::vector<float> outputData_float32(getNumberOfElements(outputShape));
297 
298     convertFloat16ToFloat32(inputData, &inputData_float32);
299     convertFloat16ToFloat32(filterData, &filterData_float32);
300     convertFloat16ToFloat32(biasData, &biasData_float32);
301 
302     groupedConvFloat32(inputData_float32.data(), inputShape, filterData_float32.data(), filterShape,
303                        biasData_float32.data(), biasShape, padding_left, padding_right, padding_top,
304                        padding_bottom, stride_width, stride_height, numGroups, activation,
305                        outputData_float32.data(), outputShape);
306     convertFloat32ToFloat16(outputData_float32, outputData);
307 
308     return true;
309 }
310 
311 template bool groupedConvQuant8PerChannel<uint8_t>(
312         const uint8_t* inputData, const Shape& inputShape, const int8_t* filterData,
313         const Shape& filterShape, const float* filterScales, const int32_t* biasData,
314         const Shape& biasShape, int32_t padding_left, int32_t padding_right, int32_t padding_top,
315         int32_t padding_bottom, int32_t stride_width, int32_t stride_height, int32_t numGroups,
316         int32_t activation, uint8_t* outputData, const Shape& outputShape);
317 
318 template bool groupedConvQuant8PerChannel<int8_t>(
319         const int8_t* inputData, const Shape& inputShape, const int8_t* filterData,
320         const Shape& filterShape, const float* filterScales, const int32_t* biasData,
321         const Shape& biasShape, int32_t padding_left, int32_t padding_right, int32_t padding_top,
322         int32_t padding_bottom, int32_t stride_width, int32_t stride_height, int32_t numGroups,
323         int32_t activation, int8_t* outputData, const Shape& outputShape);
324 
325 #undef ANDROID_NN_GROUPED_CONV_PARAMETERS
326 }  // namespace nn
327 }  // namespace android
328