1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Operations"
18 
19 #include "SVDF.h"
20 
21 #include "CpuExecutor.h"
22 #include "CpuOperationUtils.h"
23 #include "HalInterfaces.h"
24 
25 #include <vector>
26 #include "Tracing.h"
27 
28 namespace android {
29 namespace nn {
30 
31 using namespace hal;
32 
SVDF(const Operation & operation,RunTimeOperandInfo * operands)33 SVDF::SVDF(const Operation& operation, RunTimeOperandInfo* operands) {
34     NNTRACE_TRANS("SVDF::SVDF");
35     input_ = GetInput(operation, operands, kInputTensor);
36     weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor);
37     weights_time_ = GetInput(operation, operands, kWeightsTimeTensor);
38     bias_ = GetInput(operation, operands, kBiasTensor);
39     state_in_ = GetInput(operation, operands, kStateInTensor);
40 
41     const auto& rankOperand = *GetInput(operation, operands, kRankParam);
42     params_.rank_ = getScalarDataWithDefault<int>(rankOperand, 0);
43     const auto& activationOperand = *GetInput(operation, operands, kActivationParam);
44     params_.activation_ = static_cast<TfLiteFusedActivation>(getScalarDataWithDefault<int>(
45             activationOperand, TfLiteFusedActivation::kTfLiteActNone));
46 
47     state_out_ = GetOutput(operation, operands, kStateOutTensor);
48     output_ = GetOutput(operation, operands, kOutputTensor);
49 }
50 
Prepare(const Operation & operation,RunTimeOperandInfo * operands,Shape * stateShape,Shape * outputShape)51 bool SVDF::Prepare(const Operation& operation, RunTimeOperandInfo* operands, Shape* stateShape,
52                    Shape* outputShape) {
53     NNTRACE_TRANS("SVDF::Prepare");
54     // Check we have all the inputs and outputs we need.
55     const int num_inputs = NumInputsWithValues(operation, operands);
56 
57     NN_CHECK(num_inputs == 6 || num_inputs == 7);
58     constexpr int requiredInputs[] = {
59             kInputTensor, kWeightsFeatureTensor, kWeightsTimeTensor, kStateInTensor,
60             kRankParam,   kActivationParam,
61     };
62     for (const int requiredInput : requiredInputs) {
63         NN_RET_CHECK(!IsNullInput(GetInput(operation, operands, requiredInput)))
64                 << "required input " << requiredInput << " is omitted";
65     }
66     NN_CHECK_EQ(NumOutputs(operation), 2);
67 
68     // Check that the scalar operands' buffers are large enough.
69     const auto& rankOperand = *GetInput(operation, operands, kRankParam);
70     NN_RET_CHECK(rankOperand.length >= sizeof(int));
71     const auto& activationOperand = *GetInput(operation, operands, kActivationParam);
72     NN_RET_CHECK(activationOperand.length >= sizeof(int));
73 
74     const RunTimeOperandInfo* input = GetInput(operation, operands, SVDF::kInputTensor);
75     const RunTimeOperandInfo* weights_feature =
76             GetInput(operation, operands, SVDF::kWeightsFeatureTensor);
77     const RunTimeOperandInfo* weights_time =
78             GetInput(operation, operands, SVDF::kWeightsTimeTensor);
79 
80     // Check all the parameters of tensor match within themselves and match the
81     // input configuration.
82     const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam));
83     const uint32_t batch_size = SizeOfDimension(input, 0);
84     const uint32_t num_filters = SizeOfDimension(weights_feature, 0);
85     NN_CHECK_EQ(num_filters % rank, 0);
86     const uint32_t num_units = num_filters / rank;
87     const uint32_t memory_size = SizeOfDimension(weights_time, 1);
88     NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1));
89     NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters);
90 
91     const RunTimeOperandInfo* bias = GetInput(operation, operands, kBiasTensor);
92     if (!IsNullInput(bias)) {
93         NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units);
94     }
95 
96     // Resize state.
97     const Shape& inputShape = input->shape();
98     stateShape->type = inputShape.type;
99     stateShape->dimensions = {batch_size, memory_size * num_filters};
100     stateShape->offset = inputShape.offset;
101     stateShape->scale = inputShape.scale;
102 
103     // Resize output.
104     outputShape->type = inputShape.type;
105     outputShape->dimensions = {batch_size, num_units};
106     outputShape->offset = inputShape.offset;
107     outputShape->scale = inputShape.scale;
108 
109     return true;
110 }
111 
Eval()112 bool SVDF::Eval() {
113     NNTRACE_TRANS("SVDF::Eval");
114     switch (input_->type) {
115         case OperandType::TENSOR_FLOAT16: {
116             std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
117             convertFloat16ToFloat32(reinterpret_cast<_Float16*>(input_->buffer), &inputDataFloat32);
118             std::vector<float> inputStateDataFloat32(getNumberOfElements(state_in_->shape()));
119             convertFloat16ToFloat32(reinterpret_cast<_Float16*>(state_in_->buffer),
120                                     &inputStateDataFloat32);
121             std::vector<float> biasDataFloat32(getNumberOfElements(bias_->shape()));
122             if (!IsNullInput(bias_)) {
123                 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(bias_->buffer),
124                                         &biasDataFloat32);
125             }
126             std::vector<float> weightsFeatureDataFloat32(
127                     getNumberOfElements(weights_feature_->shape()));
128             convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_feature_->buffer),
129                                     &weightsFeatureDataFloat32);
130             std::vector<float> weightsTimeDataFloat32(getNumberOfElements(weights_time_->shape()));
131             convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_time_->buffer),
132                                     &weightsTimeDataFloat32);
133             std::vector<float> outputDataFloat32(getNumberOfElements(output_->shape()));
134             std::vector<float> outputStateDataFloat32(getNumberOfElements(state_out_->shape()));
135 
136             EvalFloat32(inputDataFloat32.data(), inputStateDataFloat32.data(),
137                         biasDataFloat32.data(), weightsFeatureDataFloat32.data(),
138                         weightsTimeDataFloat32.data(), outputDataFloat32.data(),
139                         outputStateDataFloat32.data());
140             convertFloat32ToFloat16(outputDataFloat32,
141                                     reinterpret_cast<_Float16*>(output_->buffer));
142             convertFloat32ToFloat16(outputStateDataFloat32,
143                                     reinterpret_cast<_Float16*>(state_out_->buffer));
144             break;
145         }
146         case OperandType::TENSOR_FLOAT32: {
147             EvalFloat32(reinterpret_cast<float*>(input_->buffer),
148                         reinterpret_cast<float*>(state_in_->buffer),
149                         reinterpret_cast<float*>(bias_->buffer),
150                         reinterpret_cast<float*>(weights_feature_->buffer),
151                         reinterpret_cast<float*>(weights_time_->buffer),
152                         reinterpret_cast<float*>(output_->buffer),
153                         reinterpret_cast<float*>(state_out_->buffer));
154             break;
155         }
156         default: {
157             LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
158             return false;
159         }
160     }
161     return true;
162 }
163 
EvalFloat32(const float * inputData,const float * inputStateData,const float * biasData,const float * weightsFeatureData,const float * weightsTimeData,float * outputData,float * outputStateData)164 void SVDF::EvalFloat32(const float* inputData, const float* inputStateData, const float* biasData,
165                        const float* weightsFeatureData, const float* weightsTimeData,
166                        float* outputData, float* outputStateData) {
167     NNTRACE_COMP("SVDF::EvalFloat32");
168 
169     const int rank = params_.rank_;
170     const int batch_size = SizeOfDimension(input_, 0);
171     const int input_size = SizeOfDimension(input_, 1);
172     const int num_filters = SizeOfDimension(weights_feature_, 0);
173     const int num_units = num_filters / rank;
174     const int memory_size = SizeOfDimension(weights_time_, 1);
175 
176     memcpy(outputStateData, inputStateData, sizeof(float) * batch_size * memory_size * num_filters);
177     // Compute conv1d(inputs, weights_feature).
178     for (int b = 0; b < batch_size; b++) {
179         float* state_ptr_batch = outputStateData + b * memory_size * num_filters;
180         for (int c = 0; c < num_filters; c++) {
181             float* state_ptr = state_ptr_batch + c * memory_size;
182             state_ptr[memory_size - 1] = 0.0;
183         }
184     }
185     // The state left most column is used to save current cycle activation. This
186     // is achieved by starting at state->data.f[memory_size - 1] and having the
187     // stride equal to memory_size.
188     tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
189             weightsFeatureData, num_filters, input_size, inputData, batch_size,
190             &outputStateData[memory_size - 1], memory_size);
191 
192     // Compute matmul(state, weights_time).
193     // The right most column is used to save temporary output (with the size of
194     // num_filters). This is achieved by starting at state->data.f and having the
195     // stride equal to memory_size.
196     float scratch[batch_size * num_filters];
197     for (int b = 0; b < batch_size; b++) {
198         float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
199         float* scratch_ptr_batch = scratch + b * num_filters;
200         tflite::tensor_utils::BatchVectorBatchVectorDotProduct(
201                 weightsTimeData, state_out_ptr_batch, memory_size, num_filters, scratch_ptr_batch,
202                 /*result_stride=*/1);
203     }
204 
205     // Initialize output with bias if provided.
206     if (!IsNullInput(bias_)) {
207         tflite::tensor_utils::VectorBatchVectorAssign(biasData, num_units, batch_size, outputData);
208     } else {
209         std::fill_n(outputData, batch_size * num_units, 0.0f);
210     }
211 
212     // Reduction sum
213     for (int b = 0; b < batch_size; b++) {
214         float* output_ptr_batch = outputData + b * num_units;
215         float* scratch_ptr_batch = scratch + b * num_filters;
216         tflite::tensor_utils::ReductionSumVector(scratch_ptr_batch, output_ptr_batch, num_units,
217                                                  rank);
218     }
219 
220     // Apply activation.
221     for (int b = 0; b < batch_size; b++) {
222         float* output_ptr_batch = outputData + b * num_units;
223         tflite::tensor_utils::ApplyActivationToVector(output_ptr_batch, num_units,
224                                                       params_.activation_, output_ptr_batch);
225     }
226 
227     // Right shift the state.
228     for (int b = 0; b < batch_size; b++) {
229         float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
230         for (int f = 0; f < num_filters; f++) {
231             tflite::tensor_utils::VectorShiftLeft<float>(state_out_ptr_batch, memory_size,
232                                                          /*shift_value=*/0.0);
233             state_out_ptr_batch += memory_size;
234         }
235     }
236 }
237 
238 }  // namespace nn
239 }  // namespace android
240