1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Operations"
18 
19 #include "Multinomial.h"
20 
21 #include "CpuExecutor.h"
22 #include "CpuOperationUtils.h"
23 #include "HalInterfaces.h"
24 #include "Tracing.h"
25 
26 #include "guarded_philox_random.h"
27 #include "philox_random.h"
28 #include "simple_philox.h"
29 
30 #include <algorithm>
31 #include <limits>
32 #include <unsupported/Eigen/CXX11/Tensor>
33 #include <vector>
34 
35 namespace android {
36 namespace nn {
37 
38 namespace {
39 
40 using namespace hal;
41 
42 template <typename T>
GetBuffer(RunTimeOperandInfo * operand)43 inline T* GetBuffer(RunTimeOperandInfo* operand) {
44     return reinterpret_cast<T*>(operand->buffer);
45 }
46 
47 template <typename T>
GetBuffer(const RunTimeOperandInfo * operand)48 inline const T* GetBuffer(const RunTimeOperandInfo* operand) {
49     return reinterpret_cast<const T*>(operand->buffer);
50 }
51 
52 }  // namespace
53 
Multinomial(const Operation & operation,RunTimeOperandInfo * operands)54 Multinomial::Multinomial(const Operation& operation, RunTimeOperandInfo* operands) {
55     NNTRACE_TRANS("Multinomial::Multinomial");
56     input_ = GetInput(operation, operands, kInputTensor);
57     sample_count_ = getScalarData<int>(*GetInput(operation, operands, kSampleCountParam));
58     random_seeds_ = GetInput(operation, operands, kRandomSeedsTensor);
59 
60     output_ = GetOutput(operation, operands, kOutputTensor);
61 }
62 
Prepare(const Operation & operation,RunTimeOperandInfo * operands,Shape * outputShape)63 bool Multinomial::Prepare(const Operation& operation, RunTimeOperandInfo* operands,
64                           Shape* outputShape) {
65     NNTRACE_TRANS("Multinomial::Prepare");
66     NN_CHECK_EQ(NumInputsWithValues(operation, operands), 3);
67     NN_CHECK_EQ(NumOutputs(operation), 1);
68 
69     const RunTimeOperandInfo* input = GetInput(operation, operands, Multinomial::kInputTensor);
70     const Shape& inputShape = input->shape();
71 
72     const uint32_t batch_size = SizeOfDimension(input, 0);
73     const uint32_t sample_count =
74             getScalarData<int>(*GetInput(operation, operands, kSampleCountParam));
75 
76     outputShape->type = OperandType::TENSOR_INT32;
77     outputShape->dimensions = {batch_size, sample_count};
78     outputShape->offset = inputShape.offset;
79     outputShape->scale = inputShape.scale;
80 
81     return true;
82 }
83 
Eval()84 bool Multinomial::Eval() {
85     NNTRACE_COMP("Multinomial::Eval");
86     switch (input_->type) {
87         case OperandType::TENSOR_FLOAT16: {
88             std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
89             convertFloat16ToFloat32(GetBuffer<_Float16>(input_), &inputDataFloat32);
90             EvalFloat32(inputDataFloat32.data());
91             break;
92         }
93         case OperandType::TENSOR_FLOAT32: {
94             EvalFloat32(GetBuffer<float>(input_));
95             break;
96         }
97         default: {
98             LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
99             return false;
100         }
101     }
102     return true;
103 }
104 
EvalFloat32(const float * inputData)105 void Multinomial::EvalFloat32(const float* inputData) {
106     const int batch_size = SizeOfDimension(input_, 0);
107     const int class_size = SizeOfDimension(input_, 1);
108 
109     tensorflow::GuardedPhiloxRandom random_generator;
110     int32_t* seeds = GetBuffer<int32_t>(random_seeds_);
111     random_generator.Init(seeds[0], seeds[1]);
112 
113     // PhiloxRandom produces results as 4 32-bit integers.
114     int sample_count_aligned = (sample_count_ + 3) / 4 * 4;
115     // The CPU operation uses 64-bit double values, so two results per sample.
116     sample_count_aligned *= 2;
117     auto random_generator_reserved =
118             random_generator.ReserveRandomOutputs(batch_size * sample_count_aligned, 256);
119     tensorflow::random::SimplePhilox simple_philox(&random_generator_reserved);
120 
121     for (uint64_t b = 0; b < batch_size; ++b) {
122         const float* input_ptr_batch = inputData + b * class_size;
123         float max = std::numeric_limits<float>::lowest();
124         for (uint64_t j = 0; j < class_size; ++j) {
125             if (Eigen::numext::isfinite(input_ptr_batch[j])) {
126                 max = std::max(max, input_ptr_batch[j]);
127             }
128         }
129         const double batch_max = static_cast<double>(max);
130         double total = 0;
131         std::vector<double> cdf;
132         cdf.resize(class_size);
133         for (uint64_t j = 0; j < class_size; ++j) {
134             if (Eigen::numext::isfinite(static_cast<float>(input_ptr_batch[j]))) {
135                 total += exp(static_cast<double>(input_ptr_batch[j]) - batch_max);
136             }
137             cdf[j] = total;
138         }
139 
140         auto* output_ptr_batch = GetBuffer<int32_t>(output_) + b * sample_count_;
141         for (uint64_t j = 0; j < sample_count_; ++j) {
142             const double target = simple_philox.RandDouble() * total;
143             auto found_iter = std::upper_bound(cdf.begin(), cdf.end(), target);
144             output_ptr_batch[j] = std::distance(cdf.begin(), found_iter);
145         }
146     }
147 }
148 
149 }  // namespace nn
150 }  // namespace android
151