1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20 #include "rsCpuBLASDispatch.h"
21 #include "eight_bit_int_gemm.h"
22
23 namespace android {
24 namespace renderscript {
25
26
27 class RsdCpuScriptIntrinsicBLAS : public RsdCpuScriptIntrinsic {
28 public:
29 void invokeForEach(uint32_t slot,
30 const Allocation ** ain,
31 uint32_t inLen,
32 Allocation * aout,
33 const void * usr,
34 uint32_t usrLen,
35 const RsScriptCall *sc) override;
36 void populateScript(Script *) override;
37 ~RsdCpuScriptIntrinsicBLAS() override;
38 RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl *ctx, const Script *s);
39
40 protected:
41
42 uint8_t a_offset = 0;
43 uint8_t b_offset = 0;
44 uint8_t c_offset = 0;
45
46 #ifdef RS_COMPATIBILITY_LIB
47 bool isBlasLibInitialized = false;
48 #endif
49 static void kernelBNNM(size_t m, size_t n, size_t k,
50 const uint8_t* a, uint8_t a_offset, size_t lda,
51 const uint8_t* b, uint8_t b_offset, size_t ldb,
52 uint8_t* c, int32_t c_offset, size_t ldc,
53 int32_t c_mult_int);
54
55
56
57 };
58
populateScript(Script * s)59 void RsdCpuScriptIntrinsicBLAS::populateScript(Script *s) {
60 s->mHal.info.exportedVariableCount = 0;
61 }
62
initABC(const Allocation ** ain,size_t size,void ** A,void ** B,void ** C,int * lda,int * ldb,int * ldc)63 static void initABC(const Allocation ** ain,
64 size_t size,
65 void** A,
66 void** B,
67 void** C,
68 int* lda,
69 int* ldb,
70 int* ldc)
71 {
72 if (ain[0]) {
73 *A = ain[0]->mHal.drvState.lod[0].mallocPtr;
74 *lda = (int)(ain[0]->mHal.drvState.lod[0].stride/size);
75 }
76 if (ain[1]) {
77 *B = ain[1]->mHal.drvState.lod[0].mallocPtr;
78 *ldb = (int)(ain[1]->mHal.drvState.lod[0].stride/size);
79 }
80 if (ain[2]) {
81 *C = ain[2]->mHal.drvState.lod[0].mallocPtr;
82 *ldc = (int)(ain[2]->mHal.drvState.lod[0].stride/size);
83 }
84 }
85
86 // Routine to setup LaunchStruct for GEMM callback.
setupGEMM(MTLaunchStructForEachBlas * mtls,const Allocation ** ain,RsBlasCall * call,RsdCpuReferenceImpl * ctx)87 static void setupGEMM(MTLaunchStructForEachBlas *mtls, const Allocation **ain, RsBlasCall* call,
88 RsdCpuReferenceImpl *ctx) {
89 uint32_t mm, nn, kk;
90 mm = call->M;
91 nn = call->N;
92 kk = call->K;
93
94 memset(mtls, 0, sizeof(MTLaunchStructForEachBlas));
95 mtls->rs = ctx;
96 mtls->sc = call;
97 mtls->dimPtr = &mtls->fep.dim;
98 mtls->fep.dim.x = nn;
99 mtls->fep.dim.y = mm;
100 mtls->fep.dim.z = kk;
101 if (ain) {
102 memcpy(mtls->ains, ain, 3 * sizeof(ain[0]));
103 }
104 uint32_t elementBytes = 4;
105 if (ain[0]) {
106 elementBytes = ain[0]->getType()->getElement()->getSizeBytes();
107 }
108 const uint32_t MIN_SIZE_TO_TILE = 64 * 1024 / elementBytes;
109 const uint32_t MAX_WORK_PER_THREAD = 512 / elementBytes;
110 const uint32_t THREAD_COUNT = ctx->getThreadCount();
111 uint32_t tileSizeN = 0;
112 uint32_t tileSizeM = 0;
113
114 // Do not tile the matrix if:
115 // 1. It is too small comparing to the other matrix.
116 // 2. It is too small comparing to MIN_SIZE_TO_TILE .
117 if (nn * kk > MIN_SIZE_TO_TILE && nn * THREAD_COUNT > mm) {
118 tileSizeN = rsMin(nn / THREAD_COUNT, MAX_WORK_PER_THREAD);
119 }
120 if (mm * kk > MIN_SIZE_TO_TILE && mm * THREAD_COUNT > nn) {
121 tileSizeM = rsMin(mm / THREAD_COUNT, MAX_WORK_PER_THREAD);
122 }
123 mtls->numTileM = 1;
124 mtls->numTileN = 1;
125 mtls->tileSizeM = mm;
126 mtls->tileSizeN = nn;
127
128 // If tiling is needed, compute the number of slices for A & B.
129 mtls->isThreadable = (tileSizeM > 0 || tileSizeN > 0);
130 if (tileSizeM) {
131 mtls->numTileM += (mm - 1) / tileSizeM;
132 mtls->tileSizeM = tileSizeM;
133 }
134 if (tileSizeN) {
135 mtls->numTileN += (nn - 1) / tileSizeN;
136 mtls->tileSizeN = tileSizeN;
137 }
138
139 mtls->mSliceNum = 0;
140 }
141
142 // Generic GEMM callback routine.
143 template <typename T_data, typename T_param, typename Func>
walk_tiled_gemm(Func blasFunc,T_param alpha,T_param beta,int vecSize,RsBlasCall * call,MTLaunchStructForEachBlas * mtls)144 static void walk_tiled_gemm(Func blasFunc, T_param alpha, T_param beta, int vecSize,
145 RsBlasCall* call, MTLaunchStructForEachBlas *mtls) {
146 // setup BLAS enum args
147 enum CBLAS_TRANSPOSE TransA = (enum CBLAS_TRANSPOSE)call->transA;
148 enum CBLAS_TRANSPOSE TransB = (enum CBLAS_TRANSPOSE)call->transB;
149
150 void *A = nullptr;
151 void *B = nullptr;
152 void *C = nullptr;
153
154 int lda = 0, ldb = 0, ldc = 0;
155
156 const Allocation *ain[RS_KERNEL_INPUT_LIMIT];
157 ain[0] = mtls->ains[0];
158 ain[1] = mtls->ains[1];
159 ain[2] = mtls->ains[2];
160
161 initABC(ain, sizeof(T_data) * vecSize, &A, &B, &C, &lda, &ldb, &ldc);
162
163 // Determin the stride of the tiled matrices.
164 int mStride = (TransA == CblasNoTrans) ? lda : 1;
165 int nStride = (TransB == CblasNoTrans) ? 1 : ldb;
166 while (1) {
167 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
168
169 uint32_t mStart = (slice % mtls->numTileM) * mtls->tileSizeM;
170 uint32_t mEnd = mStart + mtls->tileSizeM;
171 mEnd = rsMin(mEnd, (uint32_t)call->M);
172 if (mEnd <= mStart) {
173 return;
174 }
175
176 uint32_t nStart = (slice / mtls->numTileM) * mtls->tileSizeN;
177 uint32_t nEnd = nStart + mtls->tileSizeN;
178 nEnd = rsMin(nEnd, (uint32_t)call->N);
179 if (nEnd <= nStart) {
180 return;
181 }
182
183 blasFunc(CblasRowMajor, TransA, TransB,
184 mEnd - mStart, nEnd - nStart, call->K, alpha,
185 (T_data *)A + mStart * mStride * vecSize, lda,
186 (T_data *)B + nStart * nStride * vecSize, ldb, beta,
187 (T_data *)C + (mStart * ldc + nStart) * vecSize, ldc);
188 }
189 }
190
191 // SGEMM callback
walk_2d_sgemm(void * usr,uint32_t idx)192 static void walk_2d_sgemm(void *usr, uint32_t idx) {
193 MTLaunchStructForEachBlas *mtls = (MTLaunchStructForEachBlas *)usr;
194 RsBlasCall* call = (RsBlasCall*) mtls->sc;
195
196 float alpha = call->alpha.f;
197 float beta = call->beta.f;
198
199 walk_tiled_gemm<float, float, FnPtr_cblas_sgemm>(cblas_sgemm, alpha, beta, 1, call, mtls);
200 }
201
202 // DGEMM callback
walk_2d_dgemm(void * usr,uint32_t idx)203 static void walk_2d_dgemm(void *usr, uint32_t idx) {
204 MTLaunchStructForEachBlas *mtls = (MTLaunchStructForEachBlas *)usr;
205 RsBlasCall* call = (RsBlasCall*) mtls->sc;
206
207 double alpha = call->alpha.d;
208 double beta = call->beta.d;
209
210 walk_tiled_gemm<double, double, FnPtr_cblas_dgemm>(cblas_dgemm, alpha, beta, 1, call, mtls);
211 }
212
213 // CGEMM callback
walk_2d_cgemm(void * usr,uint32_t idx)214 static void walk_2d_cgemm(void *usr, uint32_t idx) {
215 MTLaunchStructForEachBlas *mtls = (MTLaunchStructForEachBlas *)usr;
216 RsBlasCall* call = (RsBlasCall*) mtls->sc;
217
218 void * alpha = (void *)&call->alpha.c;
219 void * beta = (void *)&call->beta.c;
220
221 walk_tiled_gemm<float, void *, FnPtr_cblas_cgemm>(cblas_cgemm, alpha, beta, 2, call, mtls);
222 }
223
224 // ZGEMM callback
walk_2d_zgemm(void * usr,uint32_t idx)225 static void walk_2d_zgemm(void *usr, uint32_t idx) {
226 MTLaunchStructForEachBlas *mtls = (MTLaunchStructForEachBlas *)usr;
227 RsBlasCall* call = (RsBlasCall*) mtls->sc;
228
229 void * alpha = (void *)&call->alpha.z;
230 void * beta = (void *)&call->beta.z;
231
232 walk_tiled_gemm<double, void *, FnPtr_cblas_zgemm>(cblas_zgemm, alpha, beta, 2, call, mtls);
233 }
234
235
invokeForEach(uint32_t slot,const Allocation ** ain,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)236 void RsdCpuScriptIntrinsicBLAS::invokeForEach(uint32_t slot,
237 const Allocation ** ain,
238 uint32_t inLen,
239 Allocation * aout,
240 const void * usr,
241 uint32_t usrLen,
242 const RsScriptCall *sc) {
243 RsBlasCall* call = (RsBlasCall*) usr;
244 // setup BLAS enum args
245 enum CBLAS_TRANSPOSE TransA = (enum CBLAS_TRANSPOSE)call->transA;
246 enum CBLAS_TRANSPOSE TransB = (enum CBLAS_TRANSPOSE)call->transB;
247 enum CBLAS_UPLO Uplo = (enum CBLAS_UPLO)call->uplo;
248 enum CBLAS_DIAG Diag = (enum CBLAS_DIAG)call->diag;
249 enum CBLAS_SIDE Side = (enum CBLAS_SIDE)call->side;
250
251 void *A = nullptr;
252 void *B = nullptr;
253 void *C = nullptr;
254 void *X = nullptr;
255 void *Y = nullptr;
256
257 int lda = 0, ldb = 0, ldc = 0;
258
259 MTLaunchStructForEachBlas mtls;
260
261 #ifdef RS_COMPATIBILITY_LIB
262 // Allow BNNM even without libblas
263 if (call->func != RsBlas_bnnm && !isBlasLibInitialized) {
264 if (!loadBLASLib()) {
265 ALOGE("Failed to load the BLAS lib, IntrinsicBLAS NOT supported!\n");
266 return;
267 }
268 isBlasLibInitialized = true;
269 }
270 #endif
271
272 switch (call->func) {
273
274 // Level 1 BLAS: returns into a 1D Allocation
275
276
277 // Level 2 BLAS
278 case (RsBlas_sgemv):
279 initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
280 cblas_sgemv(CblasRowMajor, TransA, call->M, call->N, call->alpha.f, (float*)A,
281 lda, (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
282 break;
283 case (RsBlas_sgbmv):
284 initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
285 cblas_sgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
286 call->alpha.f, (float*)A, lda, (float*)X, call->incX,
287 call->beta.f, (float*)Y, call->incY);
288 break;
289 case (RsBlas_strmv):
290 initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
291 cblas_strmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
292 lda, (float*)X, call->incX);
293 break;
294 case (RsBlas_stbmv):
295 initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
296 cblas_stbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (float*)A,
297 lda, (float*)X, call->incX);
298 break;
299 // stpmv takes a packed 1D Allocation only
300 case (RsBlas_stpmv):
301 initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
302 cblas_stpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
303 (float*)X, call->incX);
304 break;
305 case (RsBlas_strsv):
306 initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
307 cblas_strsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A, lda,
308 (float*)X, call->incX);
309 break;
310 case (RsBlas_stbsv):
311 initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
312 cblas_stbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (float*)A,
313 lda, (float*)X, call->incX);
314 break;
315 case (RsBlas_stpsv):
316 initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
317 cblas_stpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
318 (float*)X, call->incX);
319 break;
320 case (RsBlas_dgemv):
321 initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
322 cblas_dgemv(CblasRowMajor, TransA, call->M, call->N, call->alpha.d, (double*)A,
323 lda, (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
324 break;
325 case (RsBlas_dgbmv):
326 initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
327 cblas_dgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
328 call->alpha.d, (double*)A, lda, (double*)X, call->incX,
329 call->beta.d, (double*)Y, call->incY);
330 break;
331 case (RsBlas_dtrmv):
332 initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
333 cblas_dtrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
334 lda, (double*)X, call->incX);
335 break;
336 case (RsBlas_dtbmv):
337 initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
338 cblas_dtbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (double*)A,
339 lda, (double*)X, call->incX);
340 break;
341 // stpmv takes a packed 1D Allocation only
342 case (RsBlas_dtpmv):
343 initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
344 cblas_dtpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
345 (double*)X, call->incX);
346 break;
347 case (RsBlas_dtrsv):
348 initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
349 cblas_dtrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A, lda,
350 (double*)X, call->incX);
351 break;
352 case (RsBlas_dtbsv):
353 initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
354 cblas_dtbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (double*)A,
355 lda, (double*)X, call->incX);
356 break;
357 case (RsBlas_dtpsv):
358 initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
359 cblas_dtpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
360 (double*)X, call->incX);
361 break;
362 case (RsBlas_cgemv):
363 initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
364 cblas_cgemv(CblasRowMajor, TransA, call->M, call->N, (void*)&call->alpha.c, (void*)A,
365 lda, (void*)X, call->incX, (void*)&call->beta.c, (void*)Y, call->incY);
366 break;
367 case (RsBlas_cgbmv):
368 initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
369 cblas_cgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
370 (void*)&call->alpha.c, (void*)A, lda, (void*)X, call->incX,
371 (void*)&call->beta.c, (void*)Y, call->incY);
372 break;
373 case (RsBlas_ctrmv):
374 initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
375 cblas_ctrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
376 lda, (void*)X, call->incX);
377 break;
378 case (RsBlas_ctbmv):
379 initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
380 cblas_ctbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
381 lda, (void*)X, call->incX);
382 break;
383 // stpmv takes a packed 1D Allocation only
384 case (RsBlas_ctpmv):
385 initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
386 cblas_ctpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
387 (void*)X, call->incX);
388 break;
389 case (RsBlas_ctrsv):
390 initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
391 cblas_ctrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, lda,
392 (void*)X, call->incX);
393 break;
394 case (RsBlas_ctbsv):
395 initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
396 cblas_ctbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
397 lda, (void*)X, call->incX);
398 break;
399 case (RsBlas_ctpsv):
400 initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
401 cblas_ctpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
402 (void*)X, call->incX);
403 break;
404 case (RsBlas_zgemv):
405 initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
406 cblas_zgemv(CblasRowMajor, TransA, call->M, call->N, (void*)&call->alpha.z, (void*)A,
407 lda, (void*)X, call->incX, (void*)&call->beta.z, (void*)Y, call->incY);
408 break;
409 case (RsBlas_zgbmv):
410 initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
411 cblas_zgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
412 (void*)&call->alpha.z, (void*)A, lda, (void*)X, call->incX,
413 (void*)&call->beta.z, (void*)Y, call->incY);
414 break;
415 case (RsBlas_ztrmv):
416 initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
417 cblas_ztrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
418 lda, (void*)X, call->incX);
419 break;
420 case (RsBlas_ztbmv):
421 initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
422 cblas_ztbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
423 lda, (void*)X, call->incX);
424 break;
425 // stpmv takes a packed 1D Allocation only
426 case (RsBlas_ztpmv):
427 initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
428 cblas_ztpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
429 (void*)X, call->incX);
430 break;
431 case (RsBlas_ztrsv):
432 initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
433 cblas_ztrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, lda,
434 (void*)X, call->incX);
435 break;
436 case (RsBlas_ztbsv):
437 initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
438 cblas_ztbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
439 lda, (void*)X, call->incX);
440 break;
441 case (RsBlas_ztpsv):
442 initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
443 cblas_ztpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
444 (void*)X, call->incX);
445 break;
446
447
448 // S and D only
449 case (RsBlas_ssymv):
450 initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
451 cblas_ssymv(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)A, lda,
452 (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
453 break;
454 case (RsBlas_ssbmv):
455 initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
456 cblas_ssbmv(CblasRowMajor, Uplo, call->N, call->K, call->alpha.f,
457 (float*)A, lda, (float*)X, call->incX, call->beta.f,
458 (float*)Y, call->incY);
459 break;
460 //sspmv requires a packed 1D Allocation
461 case (RsBlas_sspmv):
462 initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
463 cblas_sspmv(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)A,
464 (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
465 break;
466 // following calls have init reordered because A is output matrix
467 case (RsBlas_sger):
468 initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
469 cblas_sger(CblasRowMajor, call->M, call->N, call->alpha.f, (float*)X,
470 call->incX, (float*)Y, call->incY, (float*)A, lda);
471 break;
472 case (RsBlas_ssyr):
473 initABC(ain, sizeof(float), &X, &A, nullptr, &ldb, &lda, nullptr);
474 cblas_ssyr(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
475 (float*)A, lda);
476 break;
477 // sspr is packed 1D Allocation A only
478 case (RsBlas_sspr):
479 initABC(ain, sizeof(float), &X, &A, nullptr, &ldb, &lda, nullptr);
480 cblas_sspr(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
481 (float*)A);
482 break;
483 case (RsBlas_ssyr2):
484 initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
485 cblas_ssyr2(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
486 (float*)Y, call->incY, (float*)A, lda);
487 break;
488 // sspr2 is packed 1D Allocation A only
489 case (RsBlas_sspr2):
490 initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
491 cblas_sspr2(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
492 (float*)Y, call->incY, (float*)A);
493 break;
494 case (RsBlas_dsymv):
495 initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
496 cblas_dsymv(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)A, lda,
497 (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
498 break;
499 case (RsBlas_dsbmv):
500 initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
501 cblas_dsbmv(CblasRowMajor, Uplo, call->N, call->K, call->alpha.d,
502 (double*)A, lda, (double*)X, call->incX, call->beta.d,
503 (double*)Y, call->incY);
504 break;
505 // dspmv requires a packed 1D Allocation
506 case (RsBlas_dspmv):
507 initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
508 cblas_dspmv(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)A,
509 (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
510 break;
511 // following calls have init reordered because A is output matrix
512 case (RsBlas_dger):
513 initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
514 cblas_dger(CblasRowMajor, call->M, call->N, call->alpha.d, (double*)X,
515 call->incX, (double*)Y, call->incY, (double*)A, lda);
516 break;
517 case (RsBlas_dsyr):
518 initABC(ain, sizeof(double), &X, &A, nullptr, &ldb, &lda, nullptr);
519 cblas_dsyr(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
520 (double*)A, lda);
521 break;
522 // dspr is packed 1D Allocation A only
523 case (RsBlas_dspr):
524 initABC(ain, sizeof(double), &X, &A, nullptr, &ldb, &lda, nullptr);
525 cblas_dspr(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
526 (double*)A);
527 break;
528 case (RsBlas_dsyr2):
529 initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
530 cblas_dsyr2(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
531 (double*)Y, call->incY, (double*)A, lda);
532 break;
533 // dspr2 is packed 1D Allocation A only
534 case (RsBlas_dspr2):
535 initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
536 cblas_dspr2(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
537 (double*)Y, call->incY, (double*)A);
538 break;
539
540 // C and Z only
541 case (RsBlas_chemv):
542 initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
543 cblas_chemv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, A, lda,
544 X, call->incX, (void*)&call->beta.c, Y, call->incY);
545 break;
546 case (RsBlas_chbmv):
547 initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
548 cblas_chbmv(CblasRowMajor, Uplo, call->N, call->K, (void*)&call->alpha.c,
549 A, lda, X, call->incX, (void*)&call->beta.c, Y, call->incY);
550 break;
551 case (RsBlas_chpmv):
552 initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
553 cblas_chpmv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, A,
554 X, call->incX, (void*)&call->beta.c, Y, call->incY);
555 break;
556 case (RsBlas_cgeru):
557 initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
558 cblas_cgeru(CblasRowMajor, call->M, call->N, (void*)&call->alpha.c,
559 X, call->incX, Y, call->incY, A, lda);
560 break;
561 case (RsBlas_cgerc):
562 initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
563 cblas_cgerc(CblasRowMajor, call->M, call->N, (void*)&call->alpha.c,
564 X, call->incX, Y, call->incY, A, lda);
565 break;
566 case (RsBlas_cher):
567 initABC(ain, sizeof(float)*2, &X, nullptr, &A, &ldb, nullptr, &lda);
568 cblas_cher(CblasRowMajor, Uplo, call->N, call->alpha.f,
569 X, call->incX, A, lda);
570 break;
571 // packed 1D Allocations only
572 case (RsBlas_chpr):
573 initABC(ain, sizeof(float)*2, &X, nullptr, &A, &ldb, nullptr, &lda);
574 cblas_chpr(CblasRowMajor, Uplo, call->N, call->alpha.f, X,
575 call->incX, A);
576 break;
577 case (RsBlas_cher2):
578 initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
579 cblas_cher2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c,
580 X, call->incX, Y, call->incY, A, lda);
581 break;
582 // packed 1D Allocations only
583 case (RsBlas_chpr2):
584 initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
585 cblas_chpr2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, X,
586 call->incX, Y, call->incY, A);
587 break;
588 case (RsBlas_zhemv):
589 initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
590 cblas_zhemv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, A, lda,
591 X, call->incX, (void*)&call->beta.z, Y, call->incY);
592 break;
593 case (RsBlas_zhbmv):
594 initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
595 cblas_zhbmv(CblasRowMajor, Uplo, call->N, call->K, (void*)&call->alpha.z,
596 A, lda, X, call->incX, (void*)&call->beta.z, Y, call->incY);
597 break;
598 case (RsBlas_zhpmv):
599 initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
600 cblas_zhpmv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, A,
601 X, call->incX, (void*)&call->beta.z, Y, call->incY);
602 break;
603 case (RsBlas_zgeru):
604 initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
605 cblas_zgeru(CblasRowMajor, call->M, call->N, (void*)&call->alpha.z,
606 X, call->incX, Y, call->incY, A, lda);
607 break;
608 case (RsBlas_zgerc):
609 initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
610 cblas_zgerc(CblasRowMajor, call->M, call->N, (void*)&call->alpha.z,
611 X, call->incX, Y, call->incY, A, lda);
612 break;
613 case (RsBlas_zher):
614 initABC(ain, sizeof(double)*2, &X, nullptr, &A, &ldb, nullptr, &lda);
615 cblas_zher(CblasRowMajor, Uplo, call->N, call->alpha.d,
616 X, call->incX, A, lda);
617 break;
618 // packed 1D Allocations only
619 case (RsBlas_zhpr):
620 initABC(ain, sizeof(double)*2, &X, nullptr, &A, &ldb, nullptr, &lda);
621 cblas_zhpr(CblasRowMajor, Uplo, call->N, call->alpha.d, X,
622 call->incX, A);
623 break;
624 case (RsBlas_zher2):
625 initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
626 cblas_zher2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z,
627 X, call->incX, Y, call->incY, A, lda);
628 break;
629 // packed 1D Allocations only
630 case (RsBlas_zhpr2):
631 initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
632 cblas_zhpr2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, X,
633 call->incX, Y, call->incY, A);
634 break;
635
636 // Level 3 BLAS
637 case (RsBlas_sgemm):
638 setupGEMM(&mtls, ain, call, mCtx);
639 if (mtls.isThreadable) {
640 mCtx->launchThreads(walk_2d_sgemm, &mtls);
641 } else {
642 initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
643 cblas_sgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, call->alpha.f,
644 (float*)A, lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
645 }
646 break;
647 case (RsBlas_ssymm):
648 initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
649 cblas_ssymm(CblasRowMajor, Side, Uplo, call->M, call->N, call->alpha.f, (float*)A,
650 lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
651 break;
652 case (RsBlas_ssyrk):
653 initABC(ain, sizeof(float), &A, nullptr, &C, &lda, nullptr, &ldc);
654 cblas_ssyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, (float*)A,
655 lda, call->beta.f, (float*)C, ldc);
656 break;
657 case (RsBlas_ssyr2k):
658 initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
659 cblas_ssyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, (float*)A,
660 lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
661 break;
662 case (RsBlas_strmm):
663 initABC(ain, sizeof(float), &A, &B, nullptr, &lda, &ldb, nullptr);
664 cblas_strmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.f,
665 (float*)A, lda, (float*)B, ldb);
666 break;
667 case (RsBlas_strsm):
668 initABC(ain, sizeof(float), &A, &B, nullptr, &lda, &ldb, nullptr);
669 cblas_strsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.f,
670 (float*)A, lda, (float*)B, ldb);
671 break;
672
673
674 case (RsBlas_dgemm):
675 setupGEMM(&mtls, ain, call, mCtx);
676 if (mtls.isThreadable) {
677 mCtx->launchThreads(walk_2d_dgemm, &mtls);
678 } else {
679 initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
680 cblas_dgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, call->alpha.d,
681 (double*)A, lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
682 }
683 break;
684 case (RsBlas_dsymm):
685 initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
686 cblas_dsymm(CblasRowMajor, Side, Uplo, call->M, call->N, call->alpha.d, (double*)A,
687 lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
688 break;
689 case (RsBlas_dsyrk):
690 initABC(ain, sizeof(double), &A, nullptr, &C, &lda, nullptr, &ldc);
691 cblas_dsyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, (double*)A,
692 lda, call->beta.d, (double*)C, ldc);
693 break;
694 case (RsBlas_dsyr2k):
695 initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
696 cblas_dsyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, (double*)A,
697 lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
698 break;
699 case (RsBlas_dtrmm):
700 initABC(ain, sizeof(double), &A, &B, nullptr, &lda, &ldb, nullptr);
701 cblas_dtrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.d,
702 (double*)A, lda, (double*)B, ldb);
703 break;
704 case (RsBlas_dtrsm):
705 initABC(ain, sizeof(double), &A, &B, nullptr, &lda, &ldb, nullptr);
706 cblas_dtrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.d,
707 (double*)A, lda, (double*)B, ldb);
708 break;
709
710 case (RsBlas_cgemm):
711 setupGEMM(&mtls, ain, call, mCtx);
712 if (mtls.isThreadable) {
713 mCtx->launchThreads(walk_2d_cgemm, &mtls);
714 } else {
715 initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
716 cblas_cgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, (void*)&call->alpha.c,
717 A, lda, B, ldb, (void*)&call->beta.c, C, ldc);
718 }
719 break;
720 case (RsBlas_csymm):
721 initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
722 cblas_csymm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.c, A,
723 lda, B, ldb, (void*)&call->beta.c, C, ldc);
724 break;
725 case (RsBlas_csyrk):
726 initABC(ain, sizeof(float)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
727 cblas_csyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A,
728 lda, (void*)&call->beta.c, C, ldc);
729 break;
730 case (RsBlas_csyr2k):
731 initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
732 cblas_csyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A,
733 lda, B, ldb, (void*)&call->beta.c, C, ldc);
734 break;
735 case (RsBlas_ctrmm):
736 initABC(ain, sizeof(float)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
737 cblas_ctrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.c,
738 A, lda, B, ldb);
739 break;
740 case (RsBlas_ctrsm):
741 initABC(ain, sizeof(float)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
742 cblas_ctrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.c,
743 A, lda, B, ldb);
744 break;
745
746 case (RsBlas_zgemm):
747 setupGEMM(&mtls, ain, call, mCtx);
748 if (mtls.isThreadable) {
749 mCtx->launchThreads(walk_2d_zgemm, &mtls);
750 } else {
751 initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
752 cblas_zgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, (void*)&call->alpha.z,
753 A, lda, B, ldb, (void*)&call->beta.z, C, ldc);
754 }
755 break;
756 case (RsBlas_zsymm):
757 initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
758 cblas_zsymm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.z, A,
759 lda, B, ldb, (void*)&call->beta.z, C, ldc);
760 break;
761 case (RsBlas_zsyrk):
762 initABC(ain, sizeof(double)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
763 cblas_zsyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A,
764 lda, (void*)&call->beta.z, C, ldc);
765 break;
766 case (RsBlas_zsyr2k):
767 initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
768 cblas_zsyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A,
769 lda, B, ldb, (void*)&call->beta.z, C, ldc);
770 break;
771 case (RsBlas_ztrmm):
772 initABC(ain, sizeof(double)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
773 cblas_ztrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.z,
774 A, lda, B, ldb);
775 break;
776 case (RsBlas_ztrsm):
777 initABC(ain, sizeof(double)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
778 cblas_ztrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.z,
779 A, lda, B, ldb);
780 break;
781
782 // Level 3 C and Z only
783 case (RsBlas_chemm):
784 initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
785 cblas_chemm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.c, A, lda,
786 B, ldb, (void*)&call->beta.c, C, ldc);
787 break;
788 case (RsBlas_cherk):
789 initABC(ain, sizeof(float)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
790 cblas_cherk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, A, lda,
791 call->beta.f, C, ldc);
792 break;
793 case (RsBlas_cher2k):
794 initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
795 cblas_cher2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A, lda,
796 B, ldb, call->beta.f, C, ldc);
797 break;
798
799 case (RsBlas_zhemm):
800 initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
801 cblas_zhemm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.z, A, lda,
802 B, ldb, (void*)&call->beta.z, C, ldc);
803 break;
804 case (RsBlas_zherk):
805 initABC(ain, sizeof(double)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
806 cblas_zherk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, A, lda,
807 call->beta.d, C, ldc);
808 break;
809 case (RsBlas_zher2k):
810 initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
811 cblas_zher2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A, lda,
812 B, ldb, call->beta.d, C, ldc);
813 break;
814
815
816 case (RsBlas_bnnm):
817 initABC(ain, sizeof(uint8_t), &A, &B, &C, &lda, &ldb, &ldc);
818 kernelBNNM(call->M, call->N, call->K,
819 (const uint8_t*)A, call->a_offset, lda,
820 (const uint8_t*)B, call->b_offset, ldb,
821 (uint8_t*)C, call->c_offset, ldc,
822 call->c_mult_int);
823
824 break;
825
826 default:
827 ALOGE("unimplemented\n");
828 }
829
830
831 }
832
kernelBNNM(size_t m,size_t n,size_t k,const uint8_t * a,uint8_t a_offset,size_t lda,const uint8_t * b,uint8_t b_offset,size_t ldb,uint8_t * c,int32_t c_offset,size_t ldc,int32_t c_mult_int)833 void RsdCpuScriptIntrinsicBLAS::kernelBNNM(size_t m, size_t n, size_t k,
834 const uint8_t* a, uint8_t a_offset, size_t lda,
835 const uint8_t* b, uint8_t b_offset, size_t ldb,
836 uint8_t* c, int32_t c_offset, size_t ldc,
837 int32_t c_mult_int) {
838 const int c_shift = 21;
839 #if defined(ARCH_ARM_HAVE_VFP) || defined(ARCH_ARM_USE_INTRINSICS)
840 // Non-optimized path for ARMv7 devices without SIMD instructions.
841 if (!gArchUseSIMD) {
842 /*
843 * Calculations are done in 1.10.21 fixed-point format for the final output,
844 * just before there's a shift down to drop the fractional parts. The output
845 * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
846 * gives some headroom to avoid wrapping around on small overflows.
847 */
848 size_t i = 0, j = 0, l = 0;
849 for (j = 0; j < n; j++) {
850 for (i = 0; i < m; i++) {
851 int32_t total = 0;
852 for (l = 0; l < k; l++) {
853 const int a_index = ((i * lda) + l);
854 const uint8_t a_as_byte = a[a_index];
855 const int32_t a_as_int = (((int32_t)(a_as_byte)) - a_offset);
856 const int b_index = ((j * ldb) + l);
857 const uint8_t b_as_byte = b[b_index];
858 const int32_t b_as_int = (((int32_t)(b_as_byte)) - b_offset);
859 const int32_t mult_as_int = (a_as_int * b_as_int);
860 total += mult_as_int;
861 }
862 const int c_index = ((ldc * i) + j);
863 int32_t output =
864 ((((total + c_offset) * c_mult_int) + (1 << (c_shift - 1)))
865 >> c_shift);
866 if (output > 255) {
867 output = 255;
868 }
869 if (output < 0) {
870 output = 0;
871 }
872 c[c_index] = (uint8_t)(output);
873 }
874 }
875 return;
876 }
877 #endif
878
879 // Using gemmlowp to calculate the low precision 8 bit GEMM.
880 // Set MaxNumThreads to 0. The value 0 lets the implementation query
881 // the system to determine the number of hardware threads
882 gemmlowp::eight_bit_int_gemm::SetMaxNumThreads(0);
883
884 bool transpose_a = true;
885 bool transpose_b = false;
886 bool transpose_c = true;
887 gemmlowp::eight_bit_int_gemm::EightBitIntGemm(transpose_a, transpose_b, transpose_c,
888 m, n, k, a, -a_offset, lda,
889 b, -b_offset, ldb, c, c_offset,
890 c_mult_int, c_shift, ldc,
891 gemmlowp::eight_bit_int_gemm::BitDepthSetting::A8B8);
892
893 }
894
895
896
897
898
RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl * ctx,const Script * s)899 RsdCpuScriptIntrinsicBLAS::RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl *ctx,
900 const Script *s)
901 : RsdCpuScriptIntrinsic(ctx, s, nullptr, RS_SCRIPT_INTRINSIC_ID_BLAS) {
902
903
904 }
905
~RsdCpuScriptIntrinsicBLAS()906 RsdCpuScriptIntrinsicBLAS::~RsdCpuScriptIntrinsicBLAS() {
907 }
908
rsdIntrinsic_BLAS(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)909 RsdCpuScriptImpl * rsdIntrinsic_BLAS(RsdCpuReferenceImpl *ctx,
910 const Script *s, const Element *e) {
911
912 return new RsdCpuScriptIntrinsicBLAS(ctx, s);
913 }
914
915 } // namespace renderscript
916 } // namespace android
917