1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "rsCpuCore.h"
18 #include "rsCpuScript.h"
19 #include "rsScriptGroup.h"
20 #include "rsCpuScriptGroup.h"
21
22 #include <vector>
23
24 namespace android {
25 namespace renderscript {
26
CpuScriptGroupImpl(RsdCpuReferenceImpl * ctx,const ScriptGroupBase * sg)27 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) {
28 mCtx = ctx;
29 mSG = (ScriptGroup*)sg;
30 }
31
~CpuScriptGroupImpl()32 CpuScriptGroupImpl::~CpuScriptGroupImpl() {
33
34 }
35
init()36 bool CpuScriptGroupImpl::init() {
37 return true;
38 }
39
setInput(const ScriptKernelID * kid,Allocation * a)40 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
41 }
42
setOutput(const ScriptKernelID * kid,Allocation * a)43 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
44 }
45
46
47 typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelDriverInfo *kinfo,
48 uint32_t xstart, uint32_t xend,
49 uint32_t outstep);
50
scriptGroupRoot(const RsExpandKernelDriverInfo * kinfo,uint32_t xstart,uint32_t xend,uint32_t outstep)51 void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelDriverInfo *kinfo,
52 uint32_t xstart, uint32_t xend,
53 uint32_t outstep) {
54
55
56 const ScriptList *sl = (const ScriptList *)kinfo->usr;
57 RsExpandKernelDriverInfo *mkinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
58
59 const uint32_t oldInStride = mkinfo->inStride[0];
60
61 for (size_t ct = 0; ct < sl->count; ct++) {
62 ScriptGroupRootFunc_t func;
63 func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
64 mkinfo->usr = sl->usrPtrs[ct];
65
66 if (sl->ins[ct]) {
67 rsAssert(kinfo->inLen == 1);
68
69 mkinfo->inPtr[0] = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
70
71 mkinfo->inStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
72
73 if (sl->inExts[ct]) {
74 mkinfo->inPtr[0] =
75 (mkinfo->inPtr[0] +
76 sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->current.y);
77
78 } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
79 mkinfo->inPtr[0] =
80 (mkinfo->inPtr[0] +
81 sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->lid);
82 }
83
84 } else {
85 rsAssert(kinfo->inLen == 0);
86
87 mkinfo->inPtr[0] = nullptr;
88 mkinfo->inStride[0] = 0;
89 }
90
91 uint32_t ostep;
92 if (sl->outs[ct]) {
93 mkinfo->outPtr[0] =
94 (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
95
96 ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
97
98 if (sl->outExts[ct]) {
99 mkinfo->outPtr[0] =
100 mkinfo->outPtr[0] +
101 sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->current.y;
102
103 } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
104 mkinfo->outPtr[0] =
105 mkinfo->outPtr[0] +
106 sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->lid;
107 }
108 } else {
109 mkinfo->outPtr[0] = nullptr;
110 ostep = 0;
111 }
112
113 //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
114 func(kinfo, xstart, xend, ostep);
115 }
116 //ALOGE("script group root");
117
118 mkinfo->inStride[0] = oldInStride;
119 mkinfo->usr = sl;
120 }
121
122
123
execute()124 void CpuScriptGroupImpl::execute() {
125 std::vector<Allocation *> ins;
126 std::vector<uint8_t> inExts;
127 std::vector<Allocation *> outs;
128 std::vector<uint8_t> outExts;
129 std::vector<const ScriptKernelID *> kernels;
130 bool fieldDep = false;
131
132 for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
133 ScriptGroup::Node *n = mSG->mNodes[ct];
134 Script *s = n->mKernels[0]->mScript;
135 if (s->hasObjectSlots()) {
136 // Disable the ScriptGroup optimization if we have global RS
137 // objects that might interfere between kernels.
138 fieldDep = true;
139 }
140
141 //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
142
143 for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
144 if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
145 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
146 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
147 }
148 }
149
150 for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
151 const ScriptKernelID *k = n->mKernels[ct2];
152 Allocation *ain = nullptr;
153 Allocation *aout = nullptr;
154 bool inExt = false;
155 bool outExt = false;
156
157 for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
158 if (n->mInputs[ct3]->mDstKernel.get() == k) {
159 ain = n->mInputs[ct3]->mAlloc.get();
160 break;
161 }
162 }
163 if (ain == nullptr) {
164 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
165 if (mSG->mInputs[ct3]->mKernel == k) {
166 ain = mSG->mInputs[ct3]->mAlloc.get();
167 inExt = true;
168 break;
169 }
170 }
171 }
172
173 for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
174 if (n->mOutputs[ct3]->mSource.get() == k) {
175 aout = n->mOutputs[ct3]->mAlloc.get();
176 if(n->mOutputs[ct3]->mDstField.get() != nullptr) {
177 fieldDep = true;
178 }
179 break;
180 }
181 }
182 if (aout == nullptr) {
183 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
184 if (mSG->mOutputs[ct3]->mKernel == k) {
185 aout = mSG->mOutputs[ct3]->mAlloc.get();
186 outExt = true;
187 break;
188 }
189 }
190 }
191
192 rsAssert((k->mHasKernelOutput == (aout != nullptr)) &&
193 (k->mHasKernelInput == (ain != nullptr)));
194
195 ins.push_back(ain);
196 inExts.push_back(inExt);
197 outs.push_back(aout);
198 outExts.push_back(outExt);
199 kernels.push_back(k);
200 }
201
202 }
203
204 MTLaunchStructForEach mtls;
205
206 if (fieldDep) {
207 for (size_t ct=0; ct < ins.size(); ct++) {
208 Script *s = kernels[ct]->mScript;
209 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
210 uint32_t slot = kernels[ct]->mSlot;
211
212 uint32_t inLen;
213 const Allocation **ains;
214
215 if (ins[ct] == nullptr) {
216 inLen = 0;
217 ains = nullptr;
218
219 } else {
220 inLen = 1;
221 ains = const_cast<const Allocation**>(&ins[ct]);
222 }
223
224 bool launchOK = si->forEachMtlsSetup(ains, inLen, outs[ct], nullptr, 0, nullptr, &mtls);
225
226 si->forEachKernelSetup(slot, &mtls);
227 si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
228 mtls.fep.usrLen, nullptr);
229
230 if (launchOK) {
231 mCtx->launchForEach(ains, inLen, outs[ct], nullptr, &mtls);
232 }
233
234 si->postLaunch(slot, ains, inLen, outs[ct], nullptr, 0, nullptr);
235 }
236 } else {
237 ScriptList sl;
238 sl.ins = ins.data();
239 sl.outs = outs.data();
240 sl.kernels = kernels.data();
241 sl.count = kernels.size();
242
243 uint32_t inLen;
244 const Allocation **ains;
245
246 if (ins[0] == nullptr) {
247 inLen = 0;
248 ains = nullptr;
249
250 } else {
251 inLen = 1;
252 ains = const_cast<const Allocation**>(&ins[0]);
253 }
254
255 std::vector<const void *> usrPtrs;
256 std::vector<const void *> fnPtrs;
257 std::vector<uint32_t> sigs;
258 for (size_t ct=0; ct < kernels.size(); ct++) {
259 Script *s = kernels[ct]->mScript;
260 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
261
262 si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
263 fnPtrs.push_back((void *)mtls.kernel);
264 usrPtrs.push_back(mtls.fep.usr);
265 sigs.push_back(mtls.fep.usrLen);
266 si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
267 mtls.fep.usr, mtls.fep.usrLen, nullptr);
268 }
269 sl.sigs = sigs.data();
270 sl.usrPtrs = usrPtrs.data();
271 sl.fnPtrs = fnPtrs.data();
272 sl.inExts = inExts.data();
273 sl.outExts = outExts.data();
274
275 Script *s = kernels[0]->mScript;
276 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
277
278 if (si->forEachMtlsSetup(ains, inLen, outs[0], nullptr, 0, nullptr, &mtls)) {
279
280 mtls.script = nullptr;
281 mtls.kernel = &scriptGroupRoot;
282 mtls.fep.usr = &sl;
283
284 mCtx->launchForEach(ains, inLen, outs[0], nullptr, &mtls);
285 }
286
287 for (size_t ct=0; ct < kernels.size(); ct++) {
288 Script *s = kernels[ct]->mScript;
289 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
290 si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], nullptr, 0,
291 nullptr);
292 }
293 }
294 }
295
296 } // namespace renderscript
297 } // namespace android
298