1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Contains the implementation of the operations.
18 
19 #define LOG_TAG "Operations"
20 
21 #include <vector>
22 
23 #include "CpuOperationUtils.h"
24 #include "Operations.h"
25 
26 #include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
27 #include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
28 
29 #include "Tracing.h"
30 
31 namespace android {
32 namespace nn {
33 
copyData(const void * inputData,const Shape & inputShape,void * outputData,const Shape & outputShape)34 bool copyData(const void* inputData, const Shape& inputShape, void* outputData,
35               const Shape& outputShape) {
36     NNTRACE_COMP("copyData");
37     size_t count = nonExtensionOperandSizeOfData(inputShape.type, inputShape.dimensions);
38     memcpy(outputData, inputData, count);
39     return true;
40 }
41 
42 template <typename T>
depthToSpaceGeneric(const T * inputData,const Shape & inputShape,int32_t blockSize,T * outputData,const Shape & outputShape)43 bool depthToSpaceGeneric(const T* inputData, const Shape& inputShape, int32_t blockSize,
44                          T* outputData, const Shape& outputShape) {
45     NNTRACE_COMP("optimized_ops::DepthToSpace");
46     tflite::optimized_ops::DepthToSpace(inputData, convertShapeToDims(inputShape), blockSize,
47                                         outputData, convertShapeToDims(outputShape));
48     return true;
49 }
50 template bool depthToSpaceGeneric<float>(const float* inputData, const Shape& inputShape,
51                                          int32_t blockSize, float* outputData,
52                                          const Shape& outputShape);
53 template bool depthToSpaceGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
54                                             int32_t blockSize, _Float16* outputData,
55                                             const Shape& outputShape);
56 template bool depthToSpaceGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
57                                            int32_t blockSize, uint8_t* outputData,
58                                            const Shape& outputShape);
59 template bool depthToSpaceGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
60                                           int32_t blockSize, int8_t* outputData,
61                                           const Shape& outputShape);
62 
63 template <typename T>
spaceToDepthGeneric(const T * inputData,const Shape & inputShape,int32_t blockSize,T * outputData,const Shape & outputShape)64 bool spaceToDepthGeneric(const T* inputData, const Shape& inputShape, int32_t blockSize,
65                          T* outputData, const Shape& outputShape) {
66     NNTRACE_COMP("optimized_ops::SpaceToDepth");
67     tflite::optimized_ops::SpaceToDepth(inputData, convertShapeToDims(inputShape), blockSize,
68                                         outputData, convertShapeToDims(outputShape));
69     return true;
70 }
71 template bool spaceToDepthGeneric<float>(const float* inputData, const Shape& inputShape,
72                                          int32_t blockSize, float* outputData,
73                                          const Shape& outputShape);
74 template bool spaceToDepthGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
75                                             int32_t blockSize, _Float16* outputData,
76                                             const Shape& outputShape);
77 template bool spaceToDepthGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
78                                            int32_t blockSize, uint8_t* outputData,
79                                            const Shape& outputShape);
80 template bool spaceToDepthGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
81                                           int32_t blockSize, int8_t* outputData,
82                                           const Shape& outputShape);
83 
84 template <typename T>
padGeneric(const T * inputData,const Shape & inputShape,const int32_t * paddings,T padValue,T * outputData,const Shape & outputShape)85 bool padGeneric(const T* inputData, const Shape& inputShape, const int32_t* paddings, T padValue,
86                 T* outputData, const Shape& outputShape) {
87     NNTRACE_TRANS("padGeneric");
88 
89     // Based on
90     // http://google3/third_party/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h?l=6194&rcl=213557260
91 
92     // TFLite runtime calls are currently fixed at 4 dimensions. Copy inputs so
93     // we can pad them to 4 dims (yes, we are "padding the padding").
94     int32_t numInputDims = static_cast<int32_t>(getNumberOfDimensions(inputShape));
95     NN_OPS_CHECK(numInputDims <= 4);
96     std::vector<int> leftPaddings(4 - numInputDims, 0);
97     std::vector<int> rightPaddings(4 - numInputDims, 0);
98     for (int32_t i = 0; i < numInputDims; ++i) {
99         leftPaddings.push_back(paddings[i * 2]);
100         rightPaddings.push_back(paddings[i * 2 + 1]);
101     }
102     const int leftBPadding = leftPaddings[0];
103     const int leftHPadding = leftPaddings[1];
104     const int leftWPadding = leftPaddings[2];
105     const int leftDPadding = leftPaddings[3];
106     const int rightBPadding = rightPaddings[0];
107     const int rightHPadding = rightPaddings[1];
108     const int rightWPadding = rightPaddings[2];
109     const int rightDPadding = rightPaddings[3];
110 
111     const auto extInputShape =
112             tflite::RuntimeShape::ExtendedShape(4, convertShapeToTflshape(inputShape));
113     const auto extOutputShape =
114             tflite::RuntimeShape::ExtendedShape(4, convertShapeToTflshape(outputShape));
115 
116     const int outputBatch = extOutputShape.Dims(0);
117     const int outputHeight = extOutputShape.Dims(1);
118     const int outputWidth = extOutputShape.Dims(2);
119     const int outputDepth = extOutputShape.Dims(3);
120 
121     const int inputDepth = extInputShape.Dims(3);
122 
123     NNTRACE_COMP_SWITCH("padGeneric");
124 
125     if (leftBPadding != 0) {
126         tflite::optimized_ops::TypedMemset<T>(
127                 outputData, padValue, leftBPadding * outputHeight * outputWidth * outputDepth);
128     }
129     for (int outB = leftBPadding; outB < outputBatch - rightBPadding; ++outB) {
130         if (leftHPadding != 0) {
131             tflite::optimized_ops::TypedMemset<T>(
132                     outputData + tflite::Offset(extOutputShape, outB, 0, 0, 0), padValue,
133                     leftHPadding * outputWidth * outputDepth);
134         }
135         for (int outH = leftHPadding; outH < outputHeight - rightHPadding; ++outH) {
136             if (leftWPadding != 0) {
137                 tflite::optimized_ops::TypedMemset<T>(
138                         outputData + tflite::Offset(extOutputShape, outB, outH, 0, 0), padValue,
139                         leftWPadding * outputDepth);
140             }
141             for (int outW = leftWPadding; outW < outputWidth - rightWPadding; ++outW) {
142                 if (leftDPadding != 0) {
143                     tflite::optimized_ops::TypedMemset<T>(
144                             outputData + tflite::Offset(extOutputShape, outB, outH, outW, 0),
145                             padValue, leftDPadding);
146                 }
147 
148                 T* out =
149                         outputData + tflite::Offset(extOutputShape, outB, outH, outW, leftDPadding);
150                 const T* in =
151                         inputData + tflite::Offset(extInputShape, outB - leftBPadding,
152                                                    outH - leftHPadding, outW - leftWPadding, 0);
153                 memcpy(out, in, inputDepth * sizeof(T));
154 
155                 if (rightDPadding != 0) {
156                     tflite::optimized_ops::TypedMemset<T>(
157                             outputData + tflite::Offset(extOutputShape, outB, outH, outW,
158                                                         outputDepth - rightDPadding),
159                             padValue, rightDPadding);
160                 }
161             }
162             if (rightWPadding != 0) {
163                 tflite::optimized_ops::TypedMemset<T>(
164                         outputData + tflite::Offset(extOutputShape, outB, outH,
165                                                     outputWidth - rightWPadding, 0),
166                         padValue, rightWPadding * outputDepth);
167             }
168         }
169         if (rightHPadding != 0) {
170             tflite::optimized_ops::TypedMemset<T>(
171                     outputData + tflite::Offset(extOutputShape, outB, outputHeight - rightHPadding,
172                                                 0, 0),
173                     padValue, rightHPadding * outputWidth * outputDepth);
174         }
175     }
176     if (rightBPadding != 0) {
177         tflite::optimized_ops::TypedMemset<T>(
178                 outputData + tflite::Offset(extOutputShape, outputBatch - rightBPadding, 0, 0, 0),
179                 padValue, rightBPadding * outputHeight * outputWidth * outputDepth);
180     }
181 
182     return true;
183 }
184 template bool padGeneric<float>(const float* inputData, const Shape& inputShape,
185                                 const int32_t* paddings, float padValue, float* outputData,
186                                 const Shape& outputShape);
187 template bool padGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
188                                    const int32_t* paddings, _Float16 padValue, _Float16* outputData,
189                                    const Shape& outputShape);
190 template bool padGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
191                                   const int32_t* paddings, uint8_t padValue, uint8_t* outputData,
192                                   const Shape& outputShape);
193 template bool padGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
194                                  const int32_t* paddings, int8_t padValue, int8_t* outputData,
195                                  const Shape& outputShape);
196 
197 template <typename T>
batchToSpaceGeneric(const T * inputData,const Shape & inputShape,const int32_t * blockSize,T * outputData,const Shape & outputShape)198 bool batchToSpaceGeneric(const T* inputData, const Shape& inputShape, const int32_t* blockSize,
199                          T* outputData, const Shape& outputShape) {
200     // Needed by low level implementation, but not really used.
201     tflite::Dims<4> blockSizeDim, cropsDim;
202     const int32 crops[4] = {0, 0, 0, 0};
203     NNTRACE_COMP("optimized_ops::BatchToSpaceND");
204     tflite::optimized_ops::BatchToSpaceND(inputData, convertShapeToDims(inputShape), blockSize,
205                                           blockSizeDim, crops, cropsDim, outputData,
206                                           convertShapeToDims(outputShape));
207     return true;
208 }
209 template bool batchToSpaceGeneric<float>(const float* inputData, const Shape& inputShape,
210                                          const int32_t* blockSize, float* outputData,
211                                          const Shape& outputShape);
212 template bool batchToSpaceGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
213                                             const int32_t* blockSize, _Float16* outputData,
214                                             const Shape& outputShape);
215 template bool batchToSpaceGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
216                                            const int32_t* blockSize, uint8_t* outputData,
217                                            const Shape& outputShape);
218 template bool batchToSpaceGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
219                                           const int32_t* blockSize, int8_t* outputData,
220                                           const Shape& outputShape);
221 
222 template <typename T>
spaceToBatchGeneric(const T * inputData,const Shape & inputShape,const int32_t * blockSize,const int32_t * padding,const Shape & paddingShape,T * outputData,const Shape & outputShape)223 bool spaceToBatchGeneric(const T* inputData, const Shape& inputShape, const int32_t* blockSize,
224                          const int32_t* padding, const Shape& paddingShape, T* outputData,
225                          const Shape& outputShape) {
226     // Needed by low level implementation, but not really used.
227     tflite::RuntimeShape blockSizeDim;
228     NNTRACE_COMP("optimized_ops::SpaceToBatchND");
229     tflite::optimized_ops::SpaceToBatchND(
230             {.output_offset = outputShape.offset}, convertShapeToTflshape(inputShape), inputData,
231             blockSizeDim, blockSize, convertShapeToTflshape(paddingShape), padding,
232             convertShapeToTflshape(outputShape), outputData);
233     return true;
234 }
235 template bool spaceToBatchGeneric<float>(const float* inputData, const Shape& inputShape,
236                                          const int32_t* blockSize, const int32_t* padding,
237                                          const Shape& paddingShape, float* outputData,
238                                          const Shape& outputShape);
239 template bool spaceToBatchGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
240                                             const int32_t* blockSize, const int32_t* padding,
241                                             const Shape& paddingShape, _Float16* outputData,
242                                             const Shape& outputShape);
243 template bool spaceToBatchGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
244                                            const int32_t* blockSize, const int32_t* padding,
245                                            const Shape& paddingShape, uint8_t* outputData,
246                                            const Shape& outputShape);
247 template bool spaceToBatchGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
248                                           const int32_t* blockSize, const int32_t* padding,
249                                           const Shape& paddingShape, int8_t* outputData,
250                                           const Shape& outputShape);
251 
252 }  // namespace nn
253 }  // namespace android
254