1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package androidx.renderscript;
18 
19 import android.support.annotation.IntDef;
20 import java.lang.annotation.Retention;
21 import java.lang.annotation.RetentionPolicy;
22 
23 /**
24  *
25  * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS.
26  *
27  * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
28  * building blocks for performing basic vector and matrix operations.
29  *
30  * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
31  *
32  **/
33 public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
34     private Allocation mLUT;
35     private static final int INTRINSIC_API_LEVEL = 23;
36 
ScriptIntrinsicBLAS(long id, RenderScript rs)37     private ScriptIntrinsicBLAS(long id, RenderScript rs) {
38         super(id, rs);
39     }
40 
41     private static final int RsBlas_sdsdot = 1;
42     private static final int RsBlas_dsdot = 2;
43     private static final int RsBlas_sdot = 3;
44     private static final int RsBlas_ddot = 4;
45     private static final int RsBlas_cdotu_sub = 5;
46     private static final int RsBlas_cdotc_sub = 6;
47     private static final int RsBlas_zdotu_sub = 7;
48     private static final int RsBlas_zdotc_sub = 8;
49     private static final int RsBlas_snrm2 = 9;
50     private static final int RsBlas_sasum = 10;
51     private static final int RsBlas_dnrm2 = 11;
52     private static final int RsBlas_dasum = 12;
53     private static final int RsBlas_scnrm2 = 13;
54     private static final int RsBlas_scasum = 14;
55     private static final int RsBlas_dznrm2 = 15;
56     private static final int RsBlas_dzasum = 16;
57     private static final int RsBlas_isamax = 17;
58     private static final int RsBlas_idamax = 18;
59     private static final int RsBlas_icamax = 19;
60     private static final int RsBlas_izamax = 20;
61     private static final int RsBlas_sswap = 21;
62     private static final int RsBlas_scopy = 22;
63     private static final int RsBlas_saxpy = 23;
64     private static final int RsBlas_dswap = 24;
65     private static final int RsBlas_dcopy = 25;
66     private static final int RsBlas_daxpy = 26;
67     private static final int RsBlas_cswap = 27;
68     private static final int RsBlas_ccopy = 28;
69     private static final int RsBlas_caxpy = 29;
70     private static final int RsBlas_zswap = 30;
71     private static final int RsBlas_zcopy = 31;
72     private static final int RsBlas_zaxpy = 32;
73     private static final int RsBlas_srotg = 33;
74     private static final int RsBlas_srotmg = 34;
75     private static final int RsBlas_srot = 35;
76     private static final int RsBlas_srotm = 36;
77     private static final int RsBlas_drotg = 37;
78     private static final int RsBlas_drotmg = 38;
79     private static final int RsBlas_drot = 39;
80     private static final int RsBlas_drotm = 40;
81     private static final int RsBlas_sscal = 41;
82     private static final int RsBlas_dscal = 42;
83     private static final int RsBlas_cscal = 43;
84     private static final int RsBlas_zscal = 44;
85     private static final int RsBlas_csscal = 45;
86     private static final int RsBlas_zdscal = 46;
87     private static final int RsBlas_sgemv = 47;
88     private static final int RsBlas_sgbmv = 48;
89     private static final int RsBlas_strmv = 49;
90     private static final int RsBlas_stbmv = 50;
91     private static final int RsBlas_stpmv = 51;
92     private static final int RsBlas_strsv = 52;
93     private static final int RsBlas_stbsv = 53;
94     private static final int RsBlas_stpsv = 54;
95     private static final int RsBlas_dgemv = 55;
96     private static final int RsBlas_dgbmv = 56;
97     private static final int RsBlas_dtrmv = 57;
98     private static final int RsBlas_dtbmv = 58;
99     private static final int RsBlas_dtpmv = 59;
100     private static final int RsBlas_dtrsv = 60;
101     private static final int RsBlas_dtbsv = 61;
102     private static final int RsBlas_dtpsv = 62;
103     private static final int RsBlas_cgemv = 63;
104     private static final int RsBlas_cgbmv = 64;
105     private static final int RsBlas_ctrmv = 65;
106     private static final int RsBlas_ctbmv = 66;
107     private static final int RsBlas_ctpmv = 67;
108     private static final int RsBlas_ctrsv = 68;
109     private static final int RsBlas_ctbsv = 69;
110     private static final int RsBlas_ctpsv = 70;
111     private static final int RsBlas_zgemv = 71;
112     private static final int RsBlas_zgbmv = 72;
113     private static final int RsBlas_ztrmv = 73;
114     private static final int RsBlas_ztbmv = 74;
115     private static final int RsBlas_ztpmv = 75;
116     private static final int RsBlas_ztrsv = 76;
117     private static final int RsBlas_ztbsv = 77;
118     private static final int RsBlas_ztpsv = 78;
119     private static final int RsBlas_ssymv = 79;
120     private static final int RsBlas_ssbmv = 80;
121     private static final int RsBlas_sspmv = 81;
122     private static final int RsBlas_sger = 82;
123     private static final int RsBlas_ssyr = 83;
124     private static final int RsBlas_sspr = 84;
125     private static final int RsBlas_ssyr2 = 85;
126     private static final int RsBlas_sspr2 = 86;
127     private static final int RsBlas_dsymv = 87;
128     private static final int RsBlas_dsbmv = 88;
129     private static final int RsBlas_dspmv = 89;
130     private static final int RsBlas_dger = 90;
131     private static final int RsBlas_dsyr = 91;
132     private static final int RsBlas_dspr = 92;
133     private static final int RsBlas_dsyr2 = 93;
134     private static final int RsBlas_dspr2 = 94;
135     private static final int RsBlas_chemv = 95;
136     private static final int RsBlas_chbmv = 96;
137     private static final int RsBlas_chpmv = 97;
138     private static final int RsBlas_cgeru = 98;
139     private static final int RsBlas_cgerc = 99;
140     private static final int RsBlas_cher = 100;
141     private static final int RsBlas_chpr = 101;
142     private static final int RsBlas_cher2 = 102;
143     private static final int RsBlas_chpr2 = 103;
144     private static final int RsBlas_zhemv = 104;
145     private static final int RsBlas_zhbmv = 105;
146     private static final int RsBlas_zhpmv = 106;
147     private static final int RsBlas_zgeru = 107;
148     private static final int RsBlas_zgerc = 108;
149     private static final int RsBlas_zher = 109;
150     private static final int RsBlas_zhpr = 110;
151     private static final int RsBlas_zher2 = 111;
152     private static final int RsBlas_zhpr2 = 112;
153     private static final int RsBlas_sgemm = 113;
154     private static final int RsBlas_ssymm = 114;
155     private static final int RsBlas_ssyrk = 115;
156     private static final int RsBlas_ssyr2k = 116;
157     private static final int RsBlas_strmm = 117;
158     private static final int RsBlas_strsm = 118;
159     private static final int RsBlas_dgemm = 119;
160     private static final int RsBlas_dsymm = 120;
161     private static final int RsBlas_dsyrk = 121;
162     private static final int RsBlas_dsyr2k = 122;
163     private static final int RsBlas_dtrmm = 123;
164     private static final int RsBlas_dtrsm = 124;
165     private static final int RsBlas_cgemm = 125;
166     private static final int RsBlas_csymm = 126;
167     private static final int RsBlas_csyrk = 127;
168     private static final int RsBlas_csyr2k = 128;
169     private static final int RsBlas_ctrmm = 129;
170     private static final int RsBlas_ctrsm = 130;
171     private static final int RsBlas_zgemm = 131;
172     private static final int RsBlas_zsymm = 132;
173     private static final int RsBlas_zsyrk = 133;
174     private static final int RsBlas_zsyr2k = 134;
175     private static final int RsBlas_ztrmm = 135;
176     private static final int RsBlas_ztrsm = 136;
177     private static final int RsBlas_chemm = 137;
178     private static final int RsBlas_cherk = 138;
179     private static final int RsBlas_cher2k = 139;
180     private static final int RsBlas_zhemm = 140;
181     private static final int RsBlas_zherk = 141;
182     private static final int RsBlas_zher2k = 142;
183 
184     // BLAS extensions start here
185     private static final int RsBlas_bnnm = 1000;
186 
187     /**
188      * Create an intrinsic to access BLAS subroutines.
189      *
190      * @param rs The RenderScript context
191      * @return ScriptIntrinsicBLAS
192      */
create(RenderScript rs)193     public static ScriptIntrinsicBLAS create(RenderScript rs) {
194         long id;
195         boolean mUseIncSupp = rs.isUseNative() &&
196                               android.os.Build.VERSION.SDK_INT < INTRINSIC_API_LEVEL;
197 
198         id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs), mUseIncSupp);
199         ScriptIntrinsicBLAS si = new ScriptIntrinsicBLAS(id, rs);
200         si.setIncSupp(mUseIncSupp);
201         return si;
202     }
203 
204     /**
205      * @hide
206      */
207     @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE})
208     @Retention(RetentionPolicy.SOURCE)
209     public @interface Transpose {}
210 
211     /**
212      * @hide
213      */
214     @IntDef({UPPER, LOWER})
215     @Retention(RetentionPolicy.SOURCE)
216     public @interface Uplo {}
217 
218     /**
219      * @hide
220      */
221     @IntDef({NON_UNIT, UNIT})
222     @Retention(RetentionPolicy.SOURCE)
223     public @interface Diag {}
224 
225     /**
226      * @hide
227      */
228     @IntDef({LEFT, RIGHT})
229     @Retention(RetentionPolicy.SOURCE)
230     public @interface Side {}
231 
232     public static final int NO_TRANSPOSE = 111;
233     public static final int TRANSPOSE = 112;
234     public static final int CONJ_TRANSPOSE = 113;
235 
236     public static final int UPPER = 121;
237     public static final int LOWER = 122;
238 
239     public static final int NON_UNIT = 131;
240     public static final int UNIT = 132;
241 
242     public static final int LEFT = 141;
243     public static final int RIGHT = 142;
244 
245     static void validateSide(@Side int Side) {
246         if (Side != LEFT && Side != RIGHT) {
247             throw new RSRuntimeException("Invalid side passed to BLAS");
248         }
249     }
250 
251     static void validateTranspose(@Transpose int Trans) {
252         if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE &&
253             Trans != CONJ_TRANSPOSE) {
254             throw new RSRuntimeException("Invalid transpose passed to BLAS");
255         }
256     }
257 
258     static void validateConjTranspose(@Transpose int Trans) {
259         if (Trans != NO_TRANSPOSE &&
260             Trans != CONJ_TRANSPOSE) {
261             throw new RSRuntimeException("Invalid transpose passed to BLAS");
262         }
263     }
264 
265     static void validateDiag(@Diag int Diag) {
266         if (Diag != NON_UNIT && Diag != UNIT) {
267             throw new RSRuntimeException("Invalid diag passed to BLAS");
268         }
269     }
270 
271     static void validateUplo(@Uplo int Uplo) {
272         if (Uplo != UPPER && Uplo != LOWER) {
273             throw new RSRuntimeException("Invalid uplo passed to BLAS");
274         }
275     }
276 
277 
278     /**
279      * Level 2 BLAS
280      */
281 
282     static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
283         validateTranspose(TransA);
284         int M = A.getType().getY();
285         int N = A.getType().getX();
286         if (!A.getType().getElement().isCompatible(e) ||
287             !X.getType().getElement().isCompatible(e) ||
288             !Y.getType().getElement().isCompatible(e)) {
289             throw new RSRuntimeException("Called BLAS with wrong Element type");
290         }
291         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
292             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
293         }
294 
295         if (incX <= 0 || incY <= 0) {
296             throw new RSRuntimeException("Vector increments must be greater than 0");
297         }
298         int expectedXDim = -1, expectedYDim = -1;
299         if (TransA == NO_TRANSPOSE) {
300             expectedXDim = 1 + (N - 1) * incX;
301             expectedYDim = 1 + (M - 1) * incY;
302         } else {
303             expectedXDim = 1 + (M - 1) * incX;
304             expectedYDim = 1 + (N - 1) * incY;
305         }
306         if (X.getType().getX() != expectedXDim ||
307             Y.getType().getX() != expectedYDim) {
308             throw new RSRuntimeException("Incorrect vector dimensions for GEMV");
309         }
310     }
311 
312     /**
313      * SGEMV performs one of the matrix-vector operations
314      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
315      *
316      * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
317      *
318      * @param TransA The type of transpose applied to matrix A.
319      * @param alpha The scalar alpha.
320      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
321      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
322      * @param incX The increment for the elements of vector x, must be larger than zero.
323      * @param beta The scalar beta.
324      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
325      * @param incY The increment for the elements of vector y, must be larger than zero.
326      */
SGEMV(@ranspose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)327     public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
328         validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
329         int M = A.getType().getY();
330         int N = A.getType().getX();
331 
332         boolean mUseIncSupp = isIncSupp();
333         long aID = A.getID(mRS);
334         long xID = X.getID(mRS);
335         long yID = Y.getID(mRS);
336         if (mUseIncSupp) {
337             aID = getDummyAlloc(A);
338             xID = getDummyAlloc(X);
339             yID = getDummyAlloc(Y);
340         }
341         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
342     }
343 
344     /**
345      * DGEMV performs one of the matrix-vector operations
346      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
347      *
348      * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
349      *
350      * @param TransA The type of transpose applied to matrix A.
351      * @param alpha The scalar alpha.
352      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
353      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
354      * @param incX The increment for the elements of vector x, must be larger than zero.
355      * @param beta The scalar beta.
356      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
357      * @param incY The increment for the elements of vector y, must be larger than zero.
358      */
DGEMV(@ranspose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)359     public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
360         validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
361         int M = A.getType().getY();
362         int N = A.getType().getX();
363 
364         boolean mUseIncSupp = isIncSupp();
365         long aID = A.getID(mRS);
366         long xID = X.getID(mRS);
367         long yID = Y.getID(mRS);
368         if (mUseIncSupp) {
369             aID = getDummyAlloc(A);
370             xID = getDummyAlloc(X);
371             yID = getDummyAlloc(Y);
372         }
373         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
374     }
375 
376     /**
377      * CGEMV performs one of the matrix-vector operations
378      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
379      *
380      * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
381      *
382      * @param TransA The type of transpose applied to matrix A.
383      * @param alpha The scalar alpha.
384      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
385      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
386      * @param incX The increment for the elements of vector x, must be larger than zero.
387      * @param beta The scalar beta.
388      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
389      * @param incY The increment for the elements of vector y, must be larger than zero.
390      */
CGEMV(@ranspose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)391     public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
392         validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
393         int M = A.getType().getY();
394         int N = A.getType().getX();
395 
396         boolean mUseIncSupp = isIncSupp();
397         long aID = A.getID(mRS);
398         long xID = X.getID(mRS);
399         long yID = Y.getID(mRS);
400         if (mUseIncSupp) {
401             aID = getDummyAlloc(A);
402             xID = getDummyAlloc(X);
403             yID = getDummyAlloc(Y);
404         }
405         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
406     }
407 
408     /**
409      * ZGEMV performs one of the matrix-vector operations
410      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
411      *
412      * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
413      *
414      * @param TransA The type of transpose applied to matrix A.
415      * @param alpha The scalar alpha.
416      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
417      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
418      * @param incX The increment for the elements of vector x, must be larger than zero.
419      * @param beta The scalar beta.
420      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
421      * @param incY The increment for the elements of vector y, must be larger than zero.
422      */
ZGEMV(@ranspose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)423     public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
424         validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
425         int M = A.getType().getY();
426         int N = A.getType().getX();
427 
428         boolean mUseIncSupp = isIncSupp();
429         long aID = A.getID(mRS);
430         long xID = X.getID(mRS);
431         long yID = Y.getID(mRS);
432         if (mUseIncSupp) {
433             aID = getDummyAlloc(A);
434             xID = getDummyAlloc(X);
435             yID = getDummyAlloc(Y);
436         }
437         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
438     }
439 
440     /**
441      * SGBMV performs one of the matrix-vector operations
442      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
443      *
444      * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
445      *
446      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
447      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
448      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
449      *           for i in range(0, m):
450      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
451      *                  b[i, j-i+kl] = a[i, j]
452      *
453      * @param TransA The type of transpose applied to matrix A.
454      * @param KL The number of sub-diagonals of the matrix A.
455      * @param KU The number of super-diagonals of the matrix A.
456      * @param alpha The scalar alpha.
457      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}.
458      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
459      * @param incX The increment for the elements of vector x, must be larger than zero.
460      * @param beta The scalar beta.
461      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
462      * @param incY The increment for the elements of vector y, must be larger than zero.
463      */
SGBMV(@ranspose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)464     public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
465         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
466         validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
467         if (KL < 0 || KU < 0) {
468             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
469         }
470         int M = A.getType().getY();
471         int N = A.getType().getX();
472 
473         boolean mUseIncSupp = isIncSupp();
474         long aID = A.getID(mRS);
475         long xID = X.getID(mRS);
476         long yID = Y.getID(mRS);
477         if (mUseIncSupp) {
478             aID = getDummyAlloc(A);
479             xID = getDummyAlloc(X);
480             yID = getDummyAlloc(Y);
481         }
482         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp);
483     }
484 
485     /**
486      * DGBMV performs one of the matrix-vector operations
487      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
488      *
489      * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
490      *
491      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
492      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
493      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
494      *           for i in range(0, m):
495      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
496      *                  b[i, j-i+kl] = a[i, j]
497      *
498      * @param TransA The type of transpose applied to matrix A.
499      * @param KL The number of sub-diagonals of the matrix A.
500      * @param KU The number of super-diagonals of the matrix A.
501      * @param alpha The scalar alpha.
502      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}.
503      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
504      * @param incX The increment for the elements of vector x, must be larger than zero.
505      * @param beta The scalar beta.
506      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
507      * @param incY The increment for the elements of vector y, must be larger than zero.
508      */
DGBMV(@ranspose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)509     public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
510         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
511         validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
512         if (KL < 0 || KU < 0) {
513             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
514         }
515         int M = A.getType().getY();
516         int N = A.getType().getX();
517 
518         boolean mUseIncSupp = isIncSupp();
519         long aID = A.getID(mRS);
520         long xID = X.getID(mRS);
521         long yID = Y.getID(mRS);
522         if (mUseIncSupp) {
523             aID = getDummyAlloc(A);
524             xID = getDummyAlloc(X);
525             yID = getDummyAlloc(Y);
526         }
527         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp);
528     }
529 
530     /**
531      * CGBMV performs one of the matrix-vector operations
532      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
533      *
534      * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
535      *
536      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
537      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
538      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
539      *           for i in range(0, m):
540      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
541      *                  b[i, j-i+kl] = a[i, j]
542      *
543      * @param TransA The type of transpose applied to matrix A.
544      * @param KL The number of sub-diagonals of the matrix A.
545      * @param KU The number of super-diagonals of the matrix A.
546      * @param alpha The scalar alpha.
547      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}.
548      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
549      * @param incX The increment for the elements of vector x, must be larger than zero.
550      * @param beta The scalar beta.
551      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
552      * @param incY The increment for the elements of vector y, must be larger than zero.
553      */
CGBMV(@ranspose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)554     public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
555         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
556         validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
557         if (KL < 0 || KU < 0) {
558             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
559         }
560         int M = A.getType().getY();
561         int N = A.getType().getX();
562 
563         boolean mUseIncSupp = isIncSupp();
564         long aID = A.getID(mRS);
565         long xID = X.getID(mRS);
566         long yID = Y.getID(mRS);
567         if (mUseIncSupp) {
568             aID = getDummyAlloc(A);
569             xID = getDummyAlloc(X);
570             yID = getDummyAlloc(Y);
571         }
572         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp);
573     }
574 
575     /**
576      * ZGBMV performs one of the matrix-vector operations
577      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
578      *
579      * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
580      *
581      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
582      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
583      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
584      *           for i in range(0, m):
585      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
586      *                  b[i, j-i+kl] = a[i, j]
587      *
588      * @param TransA The type of transpose applied to matrix A.
589      * @param KL The number of sub-diagonals of the matrix A.
590      * @param KU The number of super-diagonals of the matrix A.
591      * @param alpha The scalar alpha.
592      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}.
593      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
594      * @param incX The increment for the elements of vector x, must be larger than zero.
595      * @param beta The scalar beta.
596      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
597      * @param incY The increment for the elements of vector y, must be larger than zero.
598      */
ZGBMV(@ranspose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)599     public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
600         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
601         validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
602         if (KL < 0 || KU < 0) {
603             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
604         }
605         int M = A.getType().getY();
606         int N = A.getType().getX();
607 
608         boolean mUseIncSupp = isIncSupp();
609         long aID = A.getID(mRS);
610         long xID = X.getID(mRS);
611         long yID = Y.getID(mRS);
612         if (mUseIncSupp) {
613             aID = getDummyAlloc(A);
614             xID = getDummyAlloc(X);
615             yID = getDummyAlloc(Y);
616         }
617         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp);
618     }
619 
validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)620     static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
621         validateTranspose(TransA);
622         validateUplo(Uplo);
623         validateDiag(Diag);
624         int N = A.getType().getY();
625         if (A.getType().getX() != N) {
626             throw new RSRuntimeException("A must be a square matrix for TRMV");
627         }
628         if (!A.getType().getElement().isCompatible(e) ||
629             !X.getType().getElement().isCompatible(e)) {
630             throw new RSRuntimeException("Called BLAS with wrong Element type");
631         }
632         if (X.getType().getY() > 1) {
633             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
634         }
635 
636         if (incX <= 0) {
637             throw new RSRuntimeException("Vector increments must be greater than 0");
638         }
639         int expectedXDim = 1 + (N - 1) * incX;
640         if (X.getType().getX() != expectedXDim) {
641             throw new RSRuntimeException("Incorrect vector dimensions for TRMV");
642         }
643     }
644 
validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)645     static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
646         validateTranspose(TransA);
647         validateUplo(Uplo);
648         validateDiag(Diag);
649         if (!Ap.getType().getElement().isCompatible(e) ||
650             !X.getType().getElement().isCompatible(e)) {
651             throw new RSRuntimeException("Called BLAS with wrong Element type");
652         }
653         if (X.getType().getY() > 1) {
654             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
655         }
656 
657         if (Ap.getType().getY() > 1) {
658             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
659         }
660 
661         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
662         //is it really doing anything?
663         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
664             throw new RSRuntimeException("Invalid dimension for Ap");
665         }
666         if (incX <= 0) {
667             throw new RSRuntimeException("Vector increments must be greater than 0");
668         }
669         int expectedXDim = 1 + (N - 1) * incX;
670         if (X.getType().getX() != expectedXDim) {
671             throw new RSRuntimeException("Incorrect vector dimensions for TPMV");
672         }
673 
674         return N;
675     }
676 
677     /**
678      * STRMV performs one of the matrix-vector operations
679      * x := A*x   or   x := A**T*x
680      *
681      * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
682      *
683      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
684      * @param TransA The type of transpose applied to matrix A.
685      * @param Diag Specifies whether or not A is unit triangular.
686      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
687      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
688      * @param incX The increment for the elements of vector x, must be larger than zero.
689      */
STRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)690     public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
691         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
692         int N = A.getType().getY();
693 
694         boolean mUseIncSupp = isIncSupp();
695         long aID = A.getID(mRS);
696         long xID = X.getID(mRS);
697         if (mUseIncSupp) {
698             aID = getDummyAlloc(A);
699             xID = getDummyAlloc(X);
700         }
701         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
702     }
703 
704     /**
705      * DTRMV performs one of the matrix-vector operations
706      * x := A*x   or   x := A**T*x
707      *
708      * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
709      *
710      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
711      * @param TransA The type of transpose applied to matrix A.
712      * @param Diag Specifies whether or not A is unit triangular.
713      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
714      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
715      * @param incX The increment for the elements of vector x, must be larger than zero.
716      */
DTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)717     public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
718         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
719         int N = A.getType().getY();
720 
721         boolean mUseIncSupp = isIncSupp();
722         long aID = A.getID(mRS);
723         long xID = X.getID(mRS);
724         if (mUseIncSupp) {
725             aID = getDummyAlloc(A);
726             xID = getDummyAlloc(X);
727         }
728         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
729     }
730 
731     /**
732      * CTRMV performs one of the matrix-vector operations
733      * x := A*x   or   x := A**T*x   or   x := A**H*x
734      *
735      * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
736      *
737      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
738      * @param TransA The type of transpose applied to matrix A.
739      * @param Diag Specifies whether or not A is unit triangular.
740      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
741      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
742      * @param incX The increment for the elements of vector x, must be larger than zero.
743      */
CTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)744     public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
745         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
746         int N = A.getType().getY();
747 
748         boolean mUseIncSupp = isIncSupp();
749         long aID = A.getID(mRS);
750         long xID = X.getID(mRS);
751         if (mUseIncSupp) {
752             aID = getDummyAlloc(A);
753             xID = getDummyAlloc(X);
754         }
755         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
756     }
757 
758     /**
759      * ZTRMV performs one of the matrix-vector operations
760      * x := A*x   or   x := A**T*x   or   x := A**H*x
761      *
762      * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
763      *
764      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
765      * @param TransA The type of transpose applied to matrix A.
766      * @param Diag Specifies whether or not A is unit triangular.
767      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
768      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
769      * @param incX The increment for the elements of vector x, must be larger than zero.
770      */
ZTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)771     public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
772         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
773         int N = A.getType().getY();
774 
775         boolean mUseIncSupp = isIncSupp();
776         long aID = A.getID(mRS);
777         long xID = X.getID(mRS);
778         if (mUseIncSupp) {
779             aID = getDummyAlloc(A);
780             xID = getDummyAlloc(X);
781         }
782         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
783     }
784 
785     /**
786      * STBMV performs one of the matrix-vector operations
787      * x := A*x   or   x := A**T*x
788      *
789      * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
790      *
791      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
792      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
793      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
794      *           for i in range(0, n):
795      *              for j in range(i, min(i+k+1, n)):
796      *                  b[i, j-i] = a[i, j]
797      *
798      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
799      * @param TransA The type of transpose applied to matrix A.
800      * @param Diag Specifies whether or not A is unit triangular.
801      * @param K The number of off-diagonals of the matrix A
802      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
803      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
804      * @param incX The increment for the elements of vector x, must be larger than zero.
805      */
STBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)806     public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
807         // TBMV has the same requirements as TRMV + K >= 0
808         if (K < 0) {
809             throw new RSRuntimeException("K must be greater than or equal to 0");
810         }
811         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
812         int N = A.getType().getY();
813 
814         boolean mUseIncSupp = isIncSupp();
815         long aID = A.getID(mRS);
816         long xID = X.getID(mRS);
817         if (mUseIncSupp) {
818             aID = getDummyAlloc(A);
819             xID = getDummyAlloc(X);
820         }
821         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
822     }
823 
824     /**
825      * DTBMV performs one of the matrix-vector operations
826      * x := A*x   or   x := A**T*x
827      *
828      * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
829      *
830      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
831      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
832      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
833      *           for i in range(0, n):
834      *              for j in range(i, min(i+k+1, n)):
835      *                  b[i, j-i] = a[i, j]
836      *
837      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
838      * @param TransA The type of transpose applied to matrix A.
839      * @param Diag Specifies whether or not A is unit triangular.
840      * @param K The number of off-diagonals of the matrix A
841      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
842      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
843      * @param incX The increment for the elements of vector x, must be larger than zero.
844      */
DTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)845     public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
846         // TBMV has the same requirements as TRMV + K >= 0
847         if (K < 0) {
848             throw new RSRuntimeException("K must be greater than or equal to 0");
849         }
850         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
851         int N = A.getType().getY();
852 
853         boolean mUseIncSupp = isIncSupp();
854         long aID = A.getID(mRS);
855         long xID = X.getID(mRS);
856         if (mUseIncSupp) {
857             aID = getDummyAlloc(A);
858             xID = getDummyAlloc(X);
859         }
860         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
861     }
862 
863     /**
864      * CTBMV performs one of the matrix-vector operations
865      * x := A*x   or   x := A**T*x   or   x := A**H*x
866      *
867      * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
868      *
869      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
870      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
871      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
872      *           for i in range(0, n):
873      *              for j in range(i, min(i+k+1, n)):
874      *                  b[i, j-i] = a[i, j]
875      *
876      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
877      * @param TransA The type of transpose applied to matrix A.
878      * @param Diag Specifies whether or not A is unit triangular.
879      * @param K The number of off-diagonals of the matrix A
880      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
881      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
882      * @param incX The increment for the elements of vector x, must be larger than zero.
883      */
CTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)884     public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
885         // TBMV has the same requirements as TRMV + K >= 0
886         if (K < 0) {
887             throw new RSRuntimeException("K must be greater than or equal to 0");
888         }
889         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
890         int N = A.getType().getY();
891 
892         boolean mUseIncSupp = isIncSupp();
893         long aID = A.getID(mRS);
894         long xID = X.getID(mRS);
895         if (mUseIncSupp) {
896             aID = getDummyAlloc(A);
897             xID = getDummyAlloc(X);
898         }
899         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
900     }
901 
902     /**
903      * ZTBMV performs one of the matrix-vector operations
904      * x := A*x   or   x := A**T*x   or   x := A**H*x
905      *
906      * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
907      *
908      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
909      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
910      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
911      *           for i in range(0, n):
912      *              for j in range(i, min(i+k+1, n)):
913      *                  b[i, j-i] = a[i, j]
914      *
915      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
916      * @param TransA The type of transpose applied to matrix A.
917      * @param Diag Specifies whether or not A is unit triangular.
918      * @param K The number of off-diagonals of the matrix A
919      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
920      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
921      * @param incX The increment for the elements of vector x, must be larger than zero.
922      */
ZTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)923     public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
924         // TBMV has the same requirements as TRMV + K >= 0
925         if (K < 0) {
926             throw new RSRuntimeException("K must be greater than or equal to 0");
927         }
928         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
929         int N = A.getType().getY();
930 
931         boolean mUseIncSupp = isIncSupp();
932         long aID = A.getID(mRS);
933         long xID = X.getID(mRS);
934         if (mUseIncSupp) {
935             aID = getDummyAlloc(A);
936             xID = getDummyAlloc(X);
937         }
938         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
939     }
940 
941     /**
942      * STPMV performs one of the matrix-vector operations
943      * x := A*x   or   x := A**T*x
944      *
945      * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
946      *
947      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
948      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
949      *       'a' to packed matrix 'b'.
950      *           k = 0
951      *           for i in range(0, n):
952      *              for j in range(i, n):
953      *                  b[k++] = a[i, j]
954      *
955      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
956      * @param TransA The type of transpose applied to matrix A.
957      * @param Diag Specifies whether or not A is unit triangular.
958      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
959      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
960      * @param incX The increment for the elements of vector x, must be larger than zero.
961      */
STPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)962     public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
963         int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
964 
965         boolean mUseIncSupp = isIncSupp();
966         long apID = Ap.getID(mRS);
967         long xID = X.getID(mRS);
968         if (mUseIncSupp) {
969             apID = getDummyAlloc(Ap);
970             xID = getDummyAlloc(X);
971         }
972         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
973     }
974 
975     /**
976      * DTPMV performs one of the matrix-vector operations
977      * x := A*x   or   x := A**T*x
978      *
979      * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
980      *
981      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
982      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
983      *       'a' to packed matrix 'b'.
984      *           k = 0
985      *           for i in range(0, n):
986      *              for j in range(i, n):
987      *                  b[k++] = a[i, j]
988      *
989      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
990      * @param TransA The type of transpose applied to matrix A.
991      * @param Diag Specifies whether or not A is unit triangular.
992      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
993      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
994      * @param incX The increment for the elements of vector x, must be larger than zero.
995      */
DTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)996     public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
997         int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
998 
999         boolean mUseIncSupp = isIncSupp();
1000         long apID = Ap.getID(mRS);
1001         long xID = X.getID(mRS);
1002         if (mUseIncSupp) {
1003             apID = getDummyAlloc(Ap);
1004             xID = getDummyAlloc(X);
1005         }
1006         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1007     }
1008 
1009     /**
1010      * CTPMV performs one of the matrix-vector operations
1011      * x := A*x   or   x := A**T*x   or   x := A**H*x
1012      *
1013      * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
1014      *
1015      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1016      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1017      *       'a' to packed matrix 'b'.
1018      *           k = 0
1019      *           for i in range(0, n):
1020      *              for j in range(i, n):
1021      *                  b[k++] = a[i, j]
1022      *
1023      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1024      * @param TransA The type of transpose applied to matrix A.
1025      * @param Diag Specifies whether or not A is unit triangular.
1026      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
1027      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1028      * @param incX The increment for the elements of vector x, must be larger than zero.
1029      */
CTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1030     public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1031         int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1032 
1033         boolean mUseIncSupp = isIncSupp();
1034         long apID = Ap.getID(mRS);
1035         long xID = X.getID(mRS);
1036         if (mUseIncSupp) {
1037             apID = getDummyAlloc(Ap);
1038             xID = getDummyAlloc(X);
1039         }
1040         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1041     }
1042 
1043     /**
1044      * ZTPMV performs one of the matrix-vector operations
1045      * x := A*x   or   x := A**T*x   or   x := A**H*x
1046      *
1047      * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
1048      *
1049      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1050      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1051      *       'a' to packed matrix 'b'.
1052      *           k = 0
1053      *           for i in range(0, n):
1054      *              for j in range(i, n):
1055      *                  b[k++] = a[i, j]
1056      *
1057      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1058      * @param TransA The type of transpose applied to matrix A.
1059      * @param Diag Specifies whether or not A is unit triangular.
1060      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
1061      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1062      * @param incX The increment for the elements of vector x, must be larger than zero.
1063      */
ZTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1064     public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1065         int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1066 
1067         boolean mUseIncSupp = isIncSupp();
1068         long apID = Ap.getID(mRS);
1069         long xID = X.getID(mRS);
1070         if (mUseIncSupp) {
1071             apID = getDummyAlloc(Ap);
1072             xID = getDummyAlloc(X);
1073         }
1074         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1075     }
1076 
1077     /**
1078      * STRSV solves one of the systems of equations
1079      * A*x = b   or   A**T*x = b
1080      *
1081      * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
1082      *
1083      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1084      * @param TransA The type of transpose applied to matrix A.
1085      * @param Diag Specifies whether or not A is unit triangular.
1086      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1087      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1088      * @param incX The increment for the elements of vector x, must be larger than zero.
1089      */
STRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1090     public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1091         // TRSV is the same as TRMV
1092         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
1093         int N = A.getType().getY();
1094 
1095         boolean mUseIncSupp = isIncSupp();
1096         long aID = A.getID(mRS);
1097         long xID = X.getID(mRS);
1098         if (mUseIncSupp) {
1099             aID = getDummyAlloc(A);
1100             xID = getDummyAlloc(X);
1101         }
1102         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1103 
1104     }
1105 
1106     /**
1107      * DTRSV solves one of the systems of equations
1108      * A*x = b   or   A**T*x = b
1109      *
1110      * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
1111      *
1112      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1113      * @param TransA The type of transpose applied to matrix A.
1114      * @param Diag Specifies whether or not A is unit triangular.
1115      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1116      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1117      * @param incX The increment for the elements of vector x, must be larger than zero.
1118      */
DTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1119     public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1120         // TRSV is the same as TRMV
1121         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
1122         int N = A.getType().getY();
1123 
1124         boolean mUseIncSupp = isIncSupp();
1125         long aID = A.getID(mRS);
1126         long xID = X.getID(mRS);
1127         if (mUseIncSupp) {
1128             aID = getDummyAlloc(A);
1129             xID = getDummyAlloc(X);
1130         }
1131         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1132 
1133     }
1134 
1135     /**
1136      * CTRSV solves one of the systems of equations
1137      * A*x = b   or   A**T*x = b   or   A**H*x = b
1138      *
1139      * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
1140      *
1141      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1142      * @param TransA The type of transpose applied to matrix A.
1143      * @param Diag Specifies whether or not A is unit triangular.
1144      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1145      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1146      * @param incX The increment for the elements of vector x, must be larger than zero.
1147      */
CTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1148     public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1149         // TRSV is the same as TRMV
1150         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
1151         int N = A.getType().getY();
1152 
1153         boolean mUseIncSupp = isIncSupp();
1154         long aID = A.getID(mRS);
1155         long xID = X.getID(mRS);
1156         if (mUseIncSupp) {
1157             aID = getDummyAlloc(A);
1158             xID = getDummyAlloc(X);
1159         }
1160         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1161 
1162     }
1163 
1164     /**
1165      * ZTRSV solves one of the systems of equations
1166      * A*x = b   or   A**T*x = b   or   A**H*x = b
1167      *
1168      * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
1169      *
1170      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1171      * @param TransA The type of transpose applied to matrix A.
1172      * @param Diag Specifies whether or not A is unit triangular.
1173      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
1174      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1175      * @param incX The increment for the elements of vector x, must be larger than zero.
1176      */
ZTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1177     public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1178         // TRSV is the same as TRMV
1179         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
1180         int N = A.getType().getY();
1181 
1182         boolean mUseIncSupp = isIncSupp();
1183         long aID = A.getID(mRS);
1184         long xID = X.getID(mRS);
1185         if (mUseIncSupp) {
1186             aID = getDummyAlloc(A);
1187             xID = getDummyAlloc(X);
1188         }
1189         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1190 
1191     }
1192 
1193     /**
1194      * STBSV solves one of the systems of equations
1195      * A*x = b   or   A**T*x = b
1196      *
1197      * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
1198      *
1199      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1200      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1201      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1202      *           for i in range(0, n):
1203      *              for j in range(i, min(i+k+1, n)):
1204      *                  b[i, j-i] = a[i, j]
1205      *
1206      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1207      * @param TransA The type of transpose applied to matrix A.
1208      * @param Diag Specifies whether or not A is unit triangular.
1209      * @param K The number of off-diagonals of the matrix A
1210      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1211      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1212      * @param incX The increment for the elements of vector x, must be larger than zero.
1213      */
STBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1214     public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1215         // TBSV is the same as TRMV + K >= 0
1216         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
1217         int N = A.getType().getY();
1218         if (K < 0) {
1219             throw new RSRuntimeException("Number of diagonals must be positive");
1220         }
1221 
1222         boolean mUseIncSupp = isIncSupp();
1223         long aID = A.getID(mRS);
1224         long xID = X.getID(mRS);
1225         if (mUseIncSupp) {
1226             aID = getDummyAlloc(A);
1227             xID = getDummyAlloc(X);
1228         }
1229         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1230     }
1231 
1232     /**
1233      * DTBSV solves one of the systems of equations
1234      * A*x = b   or   A**T*x = b
1235      *
1236      * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
1237      *
1238      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1239      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1240      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1241      *           for i in range(0, n):
1242      *              for j in range(i, min(i+k+1, n)):
1243      *                  b[i, j-i] = a[i, j]
1244      *
1245      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1246      * @param TransA The type of transpose applied to matrix A.
1247      * @param Diag Specifies whether or not A is unit triangular.
1248      * @param K The number of off-diagonals of the matrix A
1249      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1250      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1251      * @param incX The increment for the elements of vector x, must be larger than zero.
1252      */
DTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1253     public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1254         // TBSV is the same as TRMV + K >= 0
1255         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
1256         int N = A.getType().getY();
1257         if (K < 0) {
1258             throw new RSRuntimeException("Number of diagonals must be positive");
1259         }
1260 
1261         boolean mUseIncSupp = isIncSupp();
1262         long aID = A.getID(mRS);
1263         long xID = X.getID(mRS);
1264         if (mUseIncSupp) {
1265             aID = getDummyAlloc(A);
1266             xID = getDummyAlloc(X);
1267         }
1268         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1269     }
1270 
1271     /**
1272      * CTBSV solves one of the systems of equations
1273      * A*x = b   or   A**T*x = b   or   A**H*x = b
1274      *
1275      * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
1276      *
1277      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1278      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1279      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1280      *           for i in range(0, n):
1281      *              for j in range(i, min(i+k+1, n)):
1282      *                  b[i, j-i] = a[i, j]
1283      *
1284      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1285      * @param TransA The type of transpose applied to matrix A.
1286      * @param Diag Specifies whether or not A is unit triangular.
1287      * @param K The number of off-diagonals of the matrix A
1288      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1289      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1290      * @param incX The increment for the elements of vector x, must be larger than zero.
1291      */
CTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1292     public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1293         // TBSV is the same as TRMV + K >= 0
1294         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
1295         int N = A.getType().getY();
1296         if (K < 0) {
1297             throw new RSRuntimeException("Number of diagonals must be positive");
1298         }
1299 
1300         boolean mUseIncSupp = isIncSupp();
1301         long aID = A.getID(mRS);
1302         long xID = X.getID(mRS);
1303         if (mUseIncSupp) {
1304             aID = getDummyAlloc(A);
1305             xID = getDummyAlloc(X);
1306         }
1307         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1308     }
1309 
1310     /**
1311      * ZTBSV solves one of the systems of equations
1312      * A*x = b   or   A**T*x = b   or   A**H*x = b
1313      *
1314      * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
1315      *
1316      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1317      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1318      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1319      *           for i in range(0, n):
1320      *              for j in range(i, min(i+k+1, n)):
1321      *                  b[i, j-i] = a[i, j]
1322      *
1323      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1324      * @param TransA The type of transpose applied to matrix A.
1325      * @param Diag Specifies whether or not A is unit triangular.
1326      * @param K The number of off-diagonals of the matrix A
1327      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
1328      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1329      * @param incX The increment for the elements of vector x, must be larger than zero.
1330      */
ZTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1331     public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1332         // TBSV is the same as TRMV + K >= 0
1333         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
1334         int N = A.getType().getY();
1335         if (K < 0) {
1336             throw new RSRuntimeException("Number of diagonals must be positive");
1337         }
1338 
1339         boolean mUseIncSupp = isIncSupp();
1340         long aID = A.getID(mRS);
1341         long xID = X.getID(mRS);
1342         if (mUseIncSupp) {
1343             aID = getDummyAlloc(A);
1344             xID = getDummyAlloc(X);
1345         }
1346         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1347     }
1348 
1349     /**
1350      * STPSV solves one of the systems of equations
1351      * A*x = b   or   A**T*x = b
1352      *
1353      * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
1354      *
1355      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1356      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1357      *       'a' to packed matrix 'b'.
1358      *           k = 0
1359      *           for i in range(0, n):
1360      *              for j in range(i, n):
1361      *                  b[k++] = a[i, j]
1362      *
1363      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1364      * @param TransA The type of transpose applied to matrix A.
1365      * @param Diag Specifies whether or not A is unit triangular.
1366      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
1367      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1368      * @param incX The increment for the elements of vector x, must be larger than zero.
1369      */
STPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1370     public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1371         // TPSV is same as TPMV
1372         int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
1373 
1374         boolean mUseIncSupp = isIncSupp();
1375         long apID = Ap.getID(mRS);
1376         long xID = X.getID(mRS);
1377         if (mUseIncSupp) {
1378             apID = getDummyAlloc(Ap);
1379             xID = getDummyAlloc(X);
1380         }
1381         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1382     }
1383 
1384     /**
1385      * DTPSV solves one of the systems of equations
1386      * A*x = b   or   A**T*x = b
1387      *
1388      * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
1389      *
1390      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1391      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1392      *       'a' to packed matrix 'b'.
1393      *           k = 0
1394      *           for i in range(0, n):
1395      *              for j in range(i, n):
1396      *                  b[k++] = a[i, j]
1397      *
1398      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1399      * @param TransA The type of transpose applied to matrix A.
1400      * @param Diag Specifies whether or not A is unit triangular.
1401      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
1402      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1403      * @param incX The increment for the elements of vector x, must be larger than zero.
1404      */
DTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1405     public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1406         // TPSV is same as TPMV
1407         int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
1408 
1409         boolean mUseIncSupp = isIncSupp();
1410         long apID = Ap.getID(mRS);
1411         long xID = X.getID(mRS);
1412         if (mUseIncSupp) {
1413             apID = getDummyAlloc(Ap);
1414             xID = getDummyAlloc(X);
1415         }
1416         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1417     }
1418 
1419     /**
1420      * CTPSV solves one of the systems of equations
1421      * A*x = b   or   A**T*x = b   or   A**H*x = b
1422      *
1423      * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
1424      *
1425      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1426      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1427      *       'a' to packed matrix 'b'.
1428      *           k = 0
1429      *           for i in range(0, n):
1430      *              for j in range(i, n):
1431      *                  b[k++] = a[i, j]
1432      *
1433      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1434      * @param TransA The type of transpose applied to matrix A.
1435      * @param Diag Specifies whether or not A is unit triangular.
1436      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
1437      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1438      * @param incX The increment for the elements of vector x, must be larger than zero.
1439      */
CTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1440     public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1441         // TPSV is same as TPMV
1442         int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1443 
1444         boolean mUseIncSupp = isIncSupp();
1445         long apID = Ap.getID(mRS);
1446         long xID = X.getID(mRS);
1447         if (mUseIncSupp) {
1448             apID = getDummyAlloc(Ap);
1449             xID = getDummyAlloc(X);
1450         }
1451         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1452     }
1453 
1454     /**
1455      * ZTPSV solves one of the systems of equations
1456      * A*x = b   or   A**T*x = b   or   A**H*x = b
1457      *
1458      * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
1459      *
1460      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1461      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1462      *       'a' to packed matrix 'b'.
1463      *           k = 0
1464      *           for i in range(0, n):
1465      *              for j in range(i, n):
1466      *                  b[k++] = a[i, j]
1467      *
1468      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1469      * @param TransA The type of transpose applied to matrix A.
1470      * @param Diag Specifies whether or not A is unit triangular.
1471      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
1472      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1473      * @param incX The increment for the elements of vector x, must be larger than zero.
1474      */
ZTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1475     public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1476         // TPSV is same as TPMV
1477         int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1478 
1479         boolean mUseIncSupp = isIncSupp();
1480         long apID = Ap.getID(mRS);
1481         long xID = X.getID(mRS);
1482         if (mUseIncSupp) {
1483             apID = getDummyAlloc(Ap);
1484             xID = getDummyAlloc(X);
1485         }
1486         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1487     }
1488 
1489     /**
1490      * Level 2, S and D only
1491      */
validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY)1492     static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) {
1493         validateUplo(Uplo);
1494         int N = A.getType().getY();
1495         if (A.getType().getX() != N) {
1496             throw new RSRuntimeException("A must be a square matrix for SYMV");
1497         }
1498         if (!A.getType().getElement().isCompatible(e) ||
1499             !X.getType().getElement().isCompatible(e) ||
1500             !Y.getType().getElement().isCompatible(e) ) {
1501             throw new RSRuntimeException("Called BLAS with wrong Element type");
1502         }
1503         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1504             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1505         }
1506 
1507         if (incX <= 0 || incY <= 0) {
1508             throw new RSRuntimeException("Vector increments must be greater than 0");
1509         }
1510         int expectedXDim = 1 + (N - 1) * incX;
1511         if (X.getType().getX() != expectedXDim) {
1512             throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1513         }
1514         int expectedYDim = 1 + (N - 1) * incY;
1515         if (Y.getType().getX() != expectedYDim) {
1516             throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1517         }
1518         return N;
1519     }
validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY)1520     static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) {
1521         validateUplo(Uplo);
1522         if (!Ap.getType().getElement().isCompatible(e) ||
1523             !X.getType().getElement().isCompatible(e) ||
1524             !Y.getType().getElement().isCompatible(e)) {
1525             throw new RSRuntimeException("Called BLAS with wrong Element type");
1526         }
1527         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1528             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1529         }
1530 
1531         if (Ap.getType().getY() > 1) {
1532             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1533         }
1534 
1535         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1536         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1537             throw new RSRuntimeException("Invalid dimension for Ap");
1538         }
1539         if (incX <= 0 || incY <= 0) {
1540             throw new RSRuntimeException("Vector increments must be greater than 0");
1541         }
1542         int expectedXDim = 1 + (N - 1) * incX;
1543         if (X.getType().getX() != expectedXDim) {
1544             throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1545         }
1546         int expectedYDim = 1 + (N - 1) * incY;
1547         if (Y.getType().getX() != expectedYDim) {
1548             throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1549         }
1550 
1551         return N;
1552     }
validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)1553     static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1554         if (!A.getType().getElement().isCompatible(e) ||
1555             !X.getType().getElement().isCompatible(e) ||
1556             !Y.getType().getElement().isCompatible(e) ) {
1557             throw new RSRuntimeException("Called BLAS with wrong Element type");
1558         }
1559 
1560         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1561             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1562         }
1563 
1564         int M = A.getType().getY();
1565         int N = A.getType().getX();
1566 
1567         if (N < 1 || M < 1) {
1568             throw new RSRuntimeException("M and N must be 1 or greater for GER");
1569         }
1570         if (incX <= 0 || incY <= 0) {
1571             throw new RSRuntimeException("Vector increments must be greater than 0");
1572         }
1573         int expectedXDim = 1 + (M - 1) * incX;
1574         if (X.getType().getX() != expectedXDim) {
1575             throw new RSRuntimeException("Incorrect vector dimensions for GER");
1576         }
1577         int expectedYDim = 1 + (N - 1) * incY;
1578         if (Y.getType().getX() != expectedYDim) {
1579             throw new RSRuntimeException("Incorrect vector dimensions for GER");
1580         }
1581 
1582 
1583     }
validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A)1584     static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) {
1585         validateUplo(Uplo);
1586         if (!A.getType().getElement().isCompatible(e) ||
1587             !X.getType().getElement().isCompatible(e)) {
1588             throw new RSRuntimeException("Called BLAS with wrong Element type");
1589         }
1590 
1591         int N = A.getType().getX();
1592 
1593         if (X.getType().getY() > 1) {
1594             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1595         }
1596         if (N != A.getType().getY()) {
1597             throw new RSRuntimeException("A must be a symmetric matrix");
1598         }
1599         if (incX <= 0) {
1600             throw new RSRuntimeException("Vector increments must be greater than 0");
1601         }
1602         int expectedXDim = 1 + (N - 1) * incX;
1603         if (X.getType().getX() != expectedXDim) {
1604             throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1605         }
1606         return N;
1607     }
validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap)1608     static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) {
1609         validateUplo(Uplo);
1610         if (!Ap.getType().getElement().isCompatible(e) ||
1611             !X.getType().getElement().isCompatible(e)) {
1612             throw new RSRuntimeException("Called BLAS with wrong Element type");
1613         }
1614         if (X.getType().getY() > 1) {
1615             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1616         }
1617 
1618         if (Ap.getType().getY() > 1) {
1619             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1620         }
1621 
1622         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1623         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1624             throw new RSRuntimeException("Invalid dimension for Ap");
1625         }
1626         if (incX <= 0) {
1627             throw new RSRuntimeException("Vector increments must be greater than 0");
1628         }
1629         int expectedXDim = 1 + (N - 1) * incX;
1630         if (X.getType().getX() != expectedXDim) {
1631             throw new RSRuntimeException("Incorrect vector dimensions for SPR");
1632         }
1633 
1634         return N;
1635     }
1636 
validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A)1637     static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1638         validateUplo(Uplo);
1639         if (!A.getType().getElement().isCompatible(e) ||
1640             !X.getType().getElement().isCompatible(e) ||
1641             !Y.getType().getElement().isCompatible(e)) {
1642             throw new RSRuntimeException("Called BLAS with wrong Element type");
1643         }
1644 
1645         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1646             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1647         }
1648 
1649         int N = A.getType().getX();
1650 
1651         if (N != A.getType().getY()) {
1652             throw new RSRuntimeException("A must be a symmetric matrix");
1653         }
1654         if (incX <= 0 || incY <= 0) {
1655             throw new RSRuntimeException("Vector increments must be greater than 0");
1656         }
1657         int expectedXDim = 1 + (N - 1) * incX;
1658         int expectedYDim = 1 + (N - 1) * incY;
1659         if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
1660             throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1661         }
1662         return N;
1663 
1664     }
validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1665     static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1666         validateUplo(Uplo);
1667         if (!Ap.getType().getElement().isCompatible(e) ||
1668             !X.getType().getElement().isCompatible(e) ||
1669             !Y.getType().getElement().isCompatible(e)) {
1670             throw new RSRuntimeException("Called BLAS with wrong Element type");
1671         }
1672         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1673             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1674         }
1675 
1676         if (Ap.getType().getY() > 1) {
1677             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1678         }
1679 
1680         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1681         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1682             throw new RSRuntimeException("Invalid dimension for Ap");
1683         }
1684         if (incX <= 0 || incY <= 0) {
1685             throw new RSRuntimeException("Vector increments must be greater than 0");
1686         }
1687         int expectedXDim = 1 + (N - 1) * incX;
1688         int expectedYDim = 1 + (N - 1) * incY;
1689         if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
1690             throw new RSRuntimeException("Incorrect vector dimensions for SPR2");
1691         }
1692 
1693         return N;
1694     }
1695 
1696     /**
1697      * SSYMV performs the matrix-vector operation
1698      * y := alpha*A*x + beta*y
1699      *
1700      * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
1701      *
1702      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1703      * @param alpha The scalar alpha.
1704      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1705      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1706      * @param incX The increment for the elements of vector x, must be larger than zero.
1707      * @param beta The scalar beta.
1708      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1709      * @param incY The increment for the elements of vector y, must be larger than zero.
1710      */
SSYMV(@plo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)1711     public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
1712         int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1713 
1714         boolean mUseIncSupp = isIncSupp();
1715         long aID = A.getID(mRS);
1716         long xID = X.getID(mRS);
1717         long yID = Y.getID(mRS);
1718         if (mUseIncSupp) {
1719             aID = getDummyAlloc(A);
1720             xID = getDummyAlloc(X);
1721             yID = getDummyAlloc(Y);
1722         }
1723         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1724     }
1725 
1726     /**
1727      * SSBMV performs the matrix-vector operation
1728      * y := alpha*A*x + beta*y
1729      *
1730      * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
1731      *
1732      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1733      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1734      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1735      *           for i in range(0, n):
1736      *              for j in range(i, min(i+k+1, n)):
1737      *                  b[i, j-i] = a[i, j]
1738      *
1739      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1740      * @param K The number of off-diagonals of the matrix A
1741      * @param alpha The scalar alpha.
1742      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1743      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1744      * @param incX The increment for the elements of vector x, must be larger than zero.
1745      * @param beta The scalar beta.
1746      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1747      * @param incY The increment for the elements of vector y, must be larger than zero.
1748      */
SSBMV(@plo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)1749     public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
1750         // SBMV is the same as SYMV + K >= 0
1751         if (K < 0) {
1752             throw new RSRuntimeException("K must be greater than or equal to 0");
1753         }
1754         int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1755 
1756         boolean mUseIncSupp = isIncSupp();
1757         long aID = A.getID(mRS);
1758         long xID = X.getID(mRS);
1759         long yID = Y.getID(mRS);
1760         if (mUseIncSupp) {
1761             aID = getDummyAlloc(A);
1762             xID = getDummyAlloc(X);
1763             yID = getDummyAlloc(Y);
1764         }
1765         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1766     }
1767 
1768     /**
1769      * SSPMV performs the matrix-vector operation
1770      * y := alpha*A*x + beta*y
1771      *
1772      * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
1773      *
1774      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1775      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1776      *       'a' to packed matrix 'b'.
1777      *           k = 0
1778      *           for i in range(0, n):
1779      *              for j in range(i, n):
1780      *                  b[k++] = a[i, j]
1781      *
1782      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1783      * @param alpha The scalar alpha.
1784      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1785      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1786      * @param incX The increment for the elements of vector x, must be larger than zero.
1787      * @param beta The scalar beta.
1788      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1789      * @param incY The increment for the elements of vector y, must be larger than zero.
1790      */
SSPMV(@plo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY)1791     public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) {
1792         int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY);
1793 
1794         boolean mUseIncSupp = isIncSupp();
1795         long apID = Ap.getID(mRS);
1796         long xID = X.getID(mRS);
1797         long yID = Y.getID(mRS);
1798         if (mUseIncSupp) {
1799             apID = getDummyAlloc(Ap);
1800             xID = getDummyAlloc(X);
1801             yID = getDummyAlloc(Y);
1802         }
1803         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1804     }
1805 
1806     /**
1807      * SGER performs the rank 1 operation
1808      * A := alpha*x*y**T + A
1809      *
1810      * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
1811      *
1812      * @param alpha The scalar alpha.
1813      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1814      * @param incX The increment for the elements of vector x, must be larger than zero.
1815      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1816      * @param incY The increment for the elements of vector y, must be larger than zero.
1817      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1818      */
SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1819     public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1820         int M = A.getType().getY();
1821         int N = A.getType().getX();
1822         validateGER(Element.F32(mRS), X, incX, Y, incY, A);
1823 
1824         boolean mUseIncSupp = isIncSupp();
1825         long aID = A.getID(mRS);
1826         long xID = X.getID(mRS);
1827         long yID = Y.getID(mRS);
1828         if (mUseIncSupp) {
1829             aID = getDummyAlloc(A);
1830             xID = getDummyAlloc(X);
1831             yID = getDummyAlloc(Y);
1832         }
1833         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp);
1834     }
1835 
1836     /**
1837      * SSYR performs the rank 1 operation
1838      * A := alpha*x*x**T + A
1839      *
1840      * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
1841      *
1842      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1843      * @param alpha The scalar alpha.
1844      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1845      * @param incX The increment for the elements of vector x, must be larger than zero.
1846      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1847      */
SSYR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation A)1848     public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
1849         int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A);
1850 
1851         boolean mUseIncSupp = isIncSupp();
1852         long aID = A.getID(mRS);
1853         long xID = X.getID(mRS);
1854         if (mUseIncSupp) {
1855             aID = getDummyAlloc(A);
1856             xID = getDummyAlloc(X);
1857         }
1858         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
1859     }
1860 
1861     /**
1862      * SSPR performs the rank 1 operation
1863      * A := alpha*x*x**T + A
1864      *
1865      * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
1866      *
1867      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1868      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1869      *       'a' to packed matrix 'b'.
1870      *           k = 0
1871      *           for i in range(0, n):
1872      *              for j in range(i, n):
1873      *                  b[k++] = a[i, j]
1874      *
1875      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1876      * @param alpha The scalar alpha.
1877      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1878      * @param incX The increment for the elements of vector x, must be larger than zero.
1879      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1880      */
SSPR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Ap)1881     public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
1882         int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap);
1883 
1884         boolean mUseIncSupp = isIncSupp();
1885         long apID = Ap.getID(mRS);
1886         long xID = X.getID(mRS);
1887         if (mUseIncSupp) {
1888             apID = getDummyAlloc(Ap);
1889             xID = getDummyAlloc(X);
1890         }
1891         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
1892     }
1893 
1894     /**
1895      * SSYR2 performs the symmetric rank 2 operation
1896      * A := alpha*x*y**T + alpha*y*x**T + A
1897      *
1898      * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
1899      *
1900      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1901      * @param alpha The scalar alpha.
1902      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1903      * @param incX The increment for the elements of vector x, must be larger than zero.
1904      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1905      * @param incY The increment for the elements of vector y, must be larger than zero.
1906      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1907      */
SSYR2(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1908     public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1909         int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A);
1910 
1911         boolean mUseIncSupp = isIncSupp();
1912         long aID = A.getID(mRS);
1913         long xID = X.getID(mRS);
1914         long yID = Y.getID(mRS);
1915         if (mUseIncSupp) {
1916             aID = getDummyAlloc(A);
1917             xID = getDummyAlloc(X);
1918             yID = getDummyAlloc(Y);
1919         }
1920         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp);
1921     }
1922 
1923     /**
1924      * SSPR2 performs the symmetric rank 2 operation
1925      * A := alpha*x*y**T + alpha*y*x**T + A
1926      *
1927      * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
1928      *
1929      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1930      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1931      *       'a' to packed matrix 'b'.
1932      *           k = 0
1933      *           for i in range(0, n):
1934      *              for j in range(i, n):
1935      *                  b[k++] = a[i, j]
1936      *
1937      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1938      * @param alpha The scalar alpha.
1939      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1940      * @param incX The increment for the elements of vector x, must be larger than zero.
1941      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1942      * @param incY The increment for the elements of vector y, must be larger than zero.
1943      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1944      */
SSPR2(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1945     public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1946         int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap);
1947 
1948         boolean mUseIncSupp = isIncSupp();
1949         long apID = Ap.getID(mRS);
1950         long xID = X.getID(mRS);
1951         long yID = Y.getID(mRS);
1952         if (mUseIncSupp) {
1953             apID = getDummyAlloc(Ap);
1954             xID = getDummyAlloc(X);
1955             yID = getDummyAlloc(Y);
1956         }
1957         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp);
1958     }
1959 
1960     /**
1961      * DSYMV performs the matrix-vector operation
1962      * y := alpha*A*x + beta*y
1963      *
1964      * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
1965      *
1966      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1967      * @param alpha The scalar alpha.
1968      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1969      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1970      * @param incX The increment for the elements of vector x, must be larger than zero.
1971      * @param beta The scalar beta.
1972      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1973      * @param incY The increment for the elements of vector y, must be larger than zero.
1974      */
DSYMV(@plo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)1975     public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
1976         int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
1977 
1978         boolean mUseIncSupp = isIncSupp();
1979         long aID = A.getID(mRS);
1980         long xID = X.getID(mRS);
1981         long yID = Y.getID(mRS);
1982         if (mUseIncSupp) {
1983             aID = getDummyAlloc(A);
1984             xID = getDummyAlloc(X);
1985             yID = getDummyAlloc(Y);
1986         }
1987         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1988     }
1989 
1990     /**
1991      * DSBMV performs the matrix-vector operation
1992      * y := alpha*A*x + beta*y
1993      *
1994      * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
1995      *
1996      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1997      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1998      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1999      *           for i in range(0, n):
2000      *              for j in range(i, min(i+k+1, n)):
2001      *                  b[i, j-i] = a[i, j]
2002      *
2003      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2004      * @param K The number of off-diagonals of the matrix A
2005      * @param alpha The scalar alpha.
2006      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2007      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2008      * @param incX The increment for the elements of vector x, must be larger than zero.
2009      * @param beta The scalar beta.
2010      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2011      * @param incY The increment for the elements of vector y, must be larger than zero.
2012      */
DSBMV(@plo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)2013     public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
2014         // SBMV is the same as SYMV + K >= 0
2015         if (K < 0) {
2016             throw new RSRuntimeException("K must be greater than or equal to 0");
2017         }
2018         int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
2019 
2020         boolean mUseIncSupp = isIncSupp();
2021         long aID = A.getID(mRS);
2022         long xID = X.getID(mRS);
2023         long yID = Y.getID(mRS);
2024         if (mUseIncSupp) {
2025             aID = getDummyAlloc(A);
2026             xID = getDummyAlloc(X);
2027             yID = getDummyAlloc(Y);
2028         }
2029         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
2030     }
2031 
2032     /**
2033      * DSPMV performs the matrix-vector operation
2034      * y := alpha*A*x + beta*y
2035      *
2036      * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
2037      *
2038      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2039      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2040      *       'a' to packed matrix 'b'.
2041      *           k = 0
2042      *           for i in range(0, n):
2043      *              for j in range(i, n):
2044      *                  b[k++] = a[i, j]
2045      *
2046      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2047      * @param alpha The scalar alpha.
2048      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
2049      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2050      * @param incX The increment for the elements of vector x, must be larger than zero.
2051      * @param beta The scalar beta.
2052      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2053      * @param incY The increment for the elements of vector y, must be larger than zero.
2054      */
DSPMV(@plo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY)2055     public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) {
2056         int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY);
2057 
2058         boolean mUseIncSupp = isIncSupp();
2059         long apID = Ap.getID(mRS);
2060         long xID = X.getID(mRS);
2061         long yID = Y.getID(mRS);
2062         if (mUseIncSupp) {
2063             apID = getDummyAlloc(Ap);
2064             xID = getDummyAlloc(X);
2065             yID = getDummyAlloc(Y);
2066         }
2067         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
2068     }
2069 
2070     /**
2071      * DGER performs the rank 1 operation
2072      * A := alpha*x*y**T + A
2073      *
2074      * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
2075      *
2076      * @param alpha The scalar alpha.
2077      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2078      * @param incX The increment for the elements of vector x, must be larger than zero.
2079      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2080      * @param incY The increment for the elements of vector y, must be larger than zero.
2081      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2082      */
DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2083     public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2084         int M = A.getType().getY();
2085         int N = A.getType().getX();
2086         validateGER(Element.F64(mRS), X, incX, Y, incY, A);
2087 
2088         boolean mUseIncSupp = isIncSupp();
2089         long aID = A.getID(mRS);
2090         long xID = X.getID(mRS);
2091         long yID = Y.getID(mRS);
2092         if (mUseIncSupp) {
2093             aID = getDummyAlloc(A);
2094             xID = getDummyAlloc(X);
2095             yID = getDummyAlloc(Y);
2096         }
2097         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp);
2098     }
2099 
2100     /**
2101      * DSYR performs the rank 1 operation
2102      * A := alpha*x*x**T + A
2103      *
2104      * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
2105      *
2106      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2107      * @param alpha The scalar alpha.
2108      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2109      * @param incX The increment for the elements of vector x, must be larger than zero.
2110      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2111      */
DSYR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation A)2112     public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
2113         int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A);
2114 
2115         boolean mUseIncSupp = isIncSupp();
2116         long aID = A.getID(mRS);
2117         long xID = X.getID(mRS);
2118         if (mUseIncSupp) {
2119             aID = getDummyAlloc(A);
2120             xID = getDummyAlloc(X);
2121         }
2122         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
2123     }
2124 
2125     /**
2126      * DSPR performs the rank 1 operation
2127      * A := alpha*x*x**T + A
2128      *
2129      * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
2130      *
2131      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2132      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2133      *       'a' to packed matrix 'b'.
2134      *           k = 0
2135      *           for i in range(0, n):
2136      *              for j in range(i, n):
2137      *                  b[k++] = a[i, j]
2138      *
2139      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2140      * @param alpha The scalar alpha.
2141      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2142      * @param incX The increment for the elements of vector x, must be larger than zero.
2143      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
2144      */
DSPR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Ap)2145     public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
2146         int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap);
2147 
2148         boolean mUseIncSupp = isIncSupp();
2149         long apID = Ap.getID(mRS);
2150         long xID = X.getID(mRS);
2151         if (mUseIncSupp) {
2152             apID = getDummyAlloc(Ap);
2153             xID = getDummyAlloc(X);
2154         }
2155         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
2156     }
2157 
2158     /**
2159      * DSYR2 performs the symmetric rank 2 operation
2160      * A := alpha*x*y**T + alpha*y*x**T + A
2161      *
2162      * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
2163      *
2164      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2165      * @param alpha The scalar alpha.
2166      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2167      * @param incX The increment for the elements of vector x, must be larger than zero.
2168      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2169      * @param incY The increment for the elements of vector y, must be larger than zero.
2170      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2171      */
DSYR2(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2172     public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2173         int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A);
2174 
2175         boolean mUseIncSupp = isIncSupp();
2176         long aID = A.getID(mRS);
2177         long xID = X.getID(mRS);
2178         long yID = Y.getID(mRS);
2179         if (mUseIncSupp) {
2180             aID = getDummyAlloc(A);
2181             xID = getDummyAlloc(X);
2182             yID = getDummyAlloc(Y);
2183         }
2184         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2185     }
2186 
2187     /**
2188      * DSPR2 performs the symmetric rank 2 operation
2189      * A := alpha*x*y**T + alpha*y*x**T + A
2190      *
2191      * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
2192      *
2193      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2194      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2195      *       'a' to packed matrix 'b'.
2196      *           k = 0
2197      *           for i in range(0, n):
2198      *              for j in range(i, n):
2199      *                  b[k++] = a[i, j]
2200      *
2201      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2202      * @param alpha The scalar alpha.
2203      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2204      * @param incX The increment for the elements of vector x, must be larger than zero.
2205      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2206      * @param incY The increment for the elements of vector y, must be larger than zero.
2207      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
2208      */
DSPR2(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2209     public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2210         int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap);
2211 
2212         boolean mUseIncSupp = isIncSupp();
2213         long apID = Ap.getID(mRS);
2214         long xID = X.getID(mRS);
2215         long yID = Y.getID(mRS);
2216         if (mUseIncSupp) {
2217             apID = getDummyAlloc(Ap);
2218             xID = getDummyAlloc(X);
2219             yID = getDummyAlloc(Y);
2220         }
2221         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp);
2222     }
2223 
2224 
2225     /**
2226      * Level 2, C and Z only
2227      */
2228 
validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)2229     static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2230         if (!A.getType().getElement().isCompatible(e) ||
2231             !X.getType().getElement().isCompatible(e) ||
2232             !Y.getType().getElement().isCompatible(e)) {
2233             throw new RSRuntimeException("Called BLAS with wrong Element type");
2234         }
2235         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
2236             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
2237         }
2238 
2239         int M = A.getType().getY();
2240         int N = A.getType().getX();
2241         if (incX <= 0 || incY <= 0) {
2242             throw new RSRuntimeException("Vector increments must be greater than 0");
2243         }
2244         int expectedXDim = 1 + (M - 1) * incX;
2245         if (X.getType().getX() != expectedXDim) {
2246             throw new RSRuntimeException("Incorrect vector dimensions for GERU");
2247         }
2248         int expectedYDim = 1 + (N - 1) * incY;
2249         if (Y.getType().getX() != expectedYDim) {
2250             throw new RSRuntimeException("Incorrect vector dimensions for GERU");
2251         }
2252 
2253     }
2254 
2255     /**
2256      * CHEMV performs the matrix-vector operation
2257      * y := alpha*A*x + beta*y
2258      *
2259      * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
2260      *
2261      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2262      * @param alpha The scalar alpha.
2263      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2264      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2265      * @param incX The increment for the elements of vector x, must be larger than zero.
2266      * @param beta The scalar beta.
2267      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2268      * @param incY The increment for the elements of vector y, must be larger than zero.
2269      */
CHEMV(@plo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)2270     public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
2271         // HEMV is the same as SYR2 validation-wise
2272         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2273 
2274         boolean mUseIncSupp = isIncSupp();
2275         long aID = A.getID(mRS);
2276         long xID = X.getID(mRS);
2277         long yID = Y.getID(mRS);
2278         if (mUseIncSupp) {
2279             aID = getDummyAlloc(A);
2280             xID = getDummyAlloc(X);
2281             yID = getDummyAlloc(Y);
2282         }
2283         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2284     }
2285 
2286     /**
2287      * CHBMV performs the matrix-vector operation
2288      * y := alpha*A*x + beta*y
2289      *
2290      * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
2291      *
2292      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2293      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2294      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2295      *           for i in range(0, n):
2296      *              for j in range(i, min(i+k+1, n)):
2297      *                  b[i, j-i] = a[i, j]
2298      *
2299      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2300      * @param K The number of off-diagonals of the matrix A
2301      * @param alpha The scalar alpha.
2302      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2303      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2304      * @param incX The increment for the elements of vector x, must be larger than zero.
2305      * @param beta The scalar beta.
2306      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2307      * @param incY The increment for the elements of vector y, must be larger than zero.
2308      */
CHBMV(@plo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)2309     public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
2310         // HBMV is the same as SYR2 validation-wise
2311         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2312         if (K < 0) {
2313             throw new RSRuntimeException("K must be 0 or greater for HBMV");
2314         }
2315 
2316         boolean mUseIncSupp = isIncSupp();
2317         long aID = A.getID(mRS);
2318         long xID = X.getID(mRS);
2319         long yID = Y.getID(mRS);
2320         if (mUseIncSupp) {
2321             aID = getDummyAlloc(A);
2322             xID = getDummyAlloc(X);
2323             yID = getDummyAlloc(Y);
2324         }
2325         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2326     }
2327 
2328     /**
2329      * CHPMV performs the matrix-vector operation
2330      * y := alpha*A*x + beta*y
2331      *
2332      * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
2333      *
2334      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2335      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2336      *       'a' to packed matrix 'b'.
2337      *           k = 0
2338      *           for i in range(0, n):
2339      *              for j in range(i, n):
2340      *                  b[k++] = a[i, j]
2341      *
2342      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2343      * @param alpha The scalar alpha.
2344      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2345      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2346      * @param incX The increment for the elements of vector x, must be larger than zero.
2347      * @param beta The scalar beta.
2348      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2349      * @param incY The increment for the elements of vector y, must be larger than zero.
2350      */
CHPMV(@plo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY)2351     public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
2352         // HPMV is the same as SPR2
2353         int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
2354 
2355         boolean mUseIncSupp = isIncSupp();
2356         long apID = Ap.getID(mRS);
2357         long xID = X.getID(mRS);
2358         long yID = Y.getID(mRS);
2359         if (mUseIncSupp) {
2360             apID = getDummyAlloc(Ap);
2361             xID = getDummyAlloc(X);
2362             yID = getDummyAlloc(Y);
2363         }
2364         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2365     }
2366 
2367     /**
2368      * CGERU performs the rank 1 operation
2369      * A := alpha*x*y**T + A
2370      *
2371      * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
2372      *
2373      * @param alpha The scalar alpha.
2374      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2375      * @param incX The increment for the elements of vector x, must be larger than zero.
2376      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2377      * @param incY The increment for the elements of vector y, must be larger than zero.
2378      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2379      */
CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2380     public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2381         validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
2382         int M = A.getType().getY();
2383         int N = A.getType().getX();
2384 
2385         boolean mUseIncSupp = isIncSupp();
2386         long aID = A.getID(mRS);
2387         long xID = X.getID(mRS);
2388         long yID = Y.getID(mRS);
2389         if (mUseIncSupp) {
2390             aID = getDummyAlloc(A);
2391             xID = getDummyAlloc(X);
2392             yID = getDummyAlloc(Y);
2393         }
2394         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2395     }
2396 
2397     /**
2398      * CGERC performs the rank 1 operation
2399      * A := alpha*x*y**H + A
2400      *
2401      * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
2402      *
2403      * @param alpha The scalar alpha.
2404      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2405      * @param incX The increment for the elements of vector x, must be larger than zero.
2406      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2407      * @param incY The increment for the elements of vector y, must be larger than zero.
2408      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2409      */
CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2410     public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2411         // same as GERU
2412         validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
2413         int M = A.getType().getY();
2414         int N = A.getType().getX();
2415 
2416         boolean mUseIncSupp = isIncSupp();
2417         long aID = A.getID(mRS);
2418         long xID = X.getID(mRS);
2419         long yID = Y.getID(mRS);
2420         if (mUseIncSupp) {
2421             aID = getDummyAlloc(A);
2422             xID = getDummyAlloc(X);
2423             yID = getDummyAlloc(Y);
2424         }
2425         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2426     }
2427 
2428     /**
2429      * CHER performs the rank 1 operation
2430      * A := alpha*x*x**H + A
2431      *
2432      * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
2433      *
2434      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2435      * @param alpha The scalar alpha.
2436      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2437      * @param incX The increment for the elements of vector x, must be larger than zero.
2438      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2439      */
CHER(@plo int Uplo, float alpha, Allocation X, int incX, Allocation A)2440     public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
2441         // same as SYR
2442         int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A);
2443 
2444         boolean mUseIncSupp = isIncSupp();
2445         long aID = A.getID(mRS);
2446         long xID = X.getID(mRS);
2447         if (mUseIncSupp) {
2448             aID = getDummyAlloc(A);
2449             xID = getDummyAlloc(X);
2450         }
2451         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp);
2452     }
2453 
2454     /**
2455      * CHPR performs the rank 1 operation
2456      * A := alpha*x*x**H + A
2457      *
2458      * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
2459      *
2460      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2461      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2462      *       'a' to packed matrix 'b'.
2463      *           k = 0
2464      *           for i in range(0, n):
2465      *              for j in range(i, n):
2466      *                  b[k++] = a[i, j]
2467      *
2468      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2469      * @param alpha The scalar alpha.
2470      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2471      * @param incX The increment for the elements of vector x, must be larger than zero.
2472      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2473      */
CHPR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Ap)2474     public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
2475         // equivalent to SPR for validation
2476         int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap);
2477 
2478         boolean mUseIncSupp = isIncSupp();
2479         long apID = Ap.getID(mRS);
2480         long xID = X.getID(mRS);
2481         if (mUseIncSupp) {
2482             apID = getDummyAlloc(Ap);
2483             xID = getDummyAlloc(X);
2484         }
2485         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp);
2486     }
2487 
2488     /**
2489      * CHER2 performs the symmetric rank 2 operation
2490      * A := alpha*x*y**H + alpha*y*x**H + A
2491      *
2492      * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
2493      *
2494      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2495      * @param alpha The scalar alpha.
2496      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2497      * @param incX The increment for the elements of vector x, must be larger than zero.
2498      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2499      * @param incY The increment for the elements of vector y, must be larger than zero.
2500      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2501      */
CHER2(@plo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2502     public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2503         // same as SYR2
2504         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2505 
2506         boolean mUseIncSupp = isIncSupp();
2507         long aID = A.getID(mRS);
2508         long xID = X.getID(mRS);
2509         long yID = Y.getID(mRS);
2510         if (mUseIncSupp) {
2511             aID = getDummyAlloc(A);
2512             xID = getDummyAlloc(X);
2513             yID = getDummyAlloc(Y);
2514         }
2515         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2516     }
2517 
2518     /**
2519      * CHPR2 performs the symmetric rank 2 operation
2520      * A := alpha*x*y**H + alpha*y*x**H + A
2521      *
2522      * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
2523      *
2524      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2525      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2526      *       'a' to packed matrix 'b'.
2527      *           k = 0
2528      *           for i in range(0, n):
2529      *              for j in range(i, n):
2530      *                  b[k++] = a[i, j]
2531      *
2532      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2533      * @param alpha The scalar alpha.
2534      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2535      * @param incX The increment for the elements of vector x, must be larger than zero.
2536      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2537      * @param incY The increment for the elements of vector y, must be larger than zero.
2538      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2539      */
CHPR2(@plo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2540     public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2541         // same as SPR2
2542         int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
2543 
2544         boolean mUseIncSupp = isIncSupp();
2545         long apID = Ap.getID(mRS);
2546         long xID = X.getID(mRS);
2547         long yID = Y.getID(mRS);
2548         if (mUseIncSupp) {
2549             apID = getDummyAlloc(Ap);
2550             xID = getDummyAlloc(X);
2551             yID = getDummyAlloc(Y);
2552         }
2553         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp);
2554     }
2555 
2556     /**
2557      * ZHEMV performs the matrix-vector operation
2558      * y := alpha*A*x + beta*y
2559      *
2560      * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
2561      *
2562      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2563      * @param alpha The scalar alpha.
2564      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2565      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2566      * @param incX The increment for the elements of vector x, must be larger than zero.
2567      * @param beta The scalar beta.
2568      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2569      * @param incY The increment for the elements of vector y, must be larger than zero.
2570      */
ZHEMV(@plo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2571     public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2572         // HEMV is the same as SYR2 validation-wise
2573         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2574 
2575         boolean mUseIncSupp = isIncSupp();
2576         long aID = A.getID(mRS);
2577         long xID = X.getID(mRS);
2578         long yID = Y.getID(mRS);
2579         if (mUseIncSupp) {
2580             aID = getDummyAlloc(A);
2581             xID = getDummyAlloc(X);
2582             yID = getDummyAlloc(Y);
2583         }
2584         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2585     }
2586 
2587     /**
2588      * ZHBMV performs the matrix-vector operation
2589      * y := alpha*A*x + beta*y
2590      *
2591      * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
2592      *
2593      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2594      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2595      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2596      *           for i in range(0, n):
2597      *              for j in range(i, min(i+k+1, n)):
2598      *                  b[i, j-i] = a[i, j]
2599      *
2600      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2601      * @param K The number of off-diagonals of the matrix A
2602      * @param alpha The scalar alpha.
2603      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2604      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2605      * @param incX The increment for the elements of vector x, must be larger than zero.
2606      * @param beta The scalar beta.
2607      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2608      * @param incY The increment for the elements of vector y, must be larger than zero.
2609      */
ZHBMV(@plo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2610     public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2611         // HBMV is the same as SYR2 validation-wise
2612         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2613         if (K < 0) {
2614             throw new RSRuntimeException("K must be 0 or greater for HBMV");
2615         }
2616 
2617         boolean mUseIncSupp = isIncSupp();
2618         long aID = A.getID(mRS);
2619         long xID = X.getID(mRS);
2620         long yID = Y.getID(mRS);
2621         if (mUseIncSupp) {
2622             aID = getDummyAlloc(A);
2623             xID = getDummyAlloc(X);
2624             yID = getDummyAlloc(Y);
2625         }
2626         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2627     }
2628 
2629     /**
2630      * ZHPMV performs the matrix-vector operation
2631      * y := alpha*A*x + beta*y
2632      *
2633      * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
2634      *
2635      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2636      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2637      *       'a' to packed matrix 'b'.
2638      *           k = 0
2639      *           for i in range(0, n):
2640      *              for j in range(i, n):
2641      *                  b[k++] = a[i, j]
2642      *
2643      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2644      * @param alpha The scalar alpha.
2645      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2646      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2647      * @param incX The increment for the elements of vector x, must be larger than zero.
2648      * @param beta The scalar beta.
2649      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2650      * @param incY The increment for the elements of vector y, must be larger than zero.
2651      */
ZHPMV(@plo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2652     public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2653         // HPMV is the same as SPR2
2654         int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2655 
2656         boolean mUseIncSupp = isIncSupp();
2657         long apID = Ap.getID(mRS);
2658         long xID = X.getID(mRS);
2659         long yID = Y.getID(mRS);
2660         if (mUseIncSupp) {
2661             apID = getDummyAlloc(Ap);
2662             xID = getDummyAlloc(X);
2663             yID = getDummyAlloc(Y);
2664         }
2665         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2666     }
2667 
2668     /**
2669      * ZGERU performs the rank 1 operation
2670      * A := alpha*x*y**T + A
2671      *
2672      * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
2673      *
2674      * @param alpha The scalar alpha.
2675      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2676      * @param incX The increment for the elements of vector x, must be larger than zero.
2677      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2678      * @param incY The increment for the elements of vector y, must be larger than zero.
2679      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2680      */
ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2681     public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2682         validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2683         int M = A.getType().getY();
2684         int N = A.getType().getX();
2685 
2686         boolean mUseIncSupp = isIncSupp();
2687         long aID = A.getID(mRS);
2688         long xID = X.getID(mRS);
2689         long yID = Y.getID(mRS);
2690         if (mUseIncSupp) {
2691             aID = getDummyAlloc(A);
2692             xID = getDummyAlloc(X);
2693             yID = getDummyAlloc(Y);
2694         }
2695         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2696     }
2697 
2698     /**
2699      * ZGERC performs the rank 1 operation
2700      * A := alpha*x*y**H + A
2701      *
2702      * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
2703      *
2704      * @param alpha The scalar alpha.
2705      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2706      * @param incX The increment for the elements of vector x, must be larger than zero.
2707      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2708      * @param incY The increment for the elements of vector y, must be larger than zero.
2709      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2710      */
ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2711     public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2712         // same as GERU
2713         validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2714         int M = A.getType().getY();
2715         int N = A.getType().getX();
2716 
2717         boolean mUseIncSupp = isIncSupp();
2718         long aID = A.getID(mRS);
2719         long xID = X.getID(mRS);
2720         long yID = Y.getID(mRS);
2721         if (mUseIncSupp) {
2722             aID = getDummyAlloc(A);
2723             xID = getDummyAlloc(X);
2724             yID = getDummyAlloc(Y);
2725         }
2726         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2727     }
2728 
2729     /**
2730      * ZHER performs the rank 1 operation
2731      * A := alpha*x*x**H + A
2732      *
2733      * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
2734      *
2735      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2736      * @param alpha The scalar alpha.
2737      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2738      * @param incX The increment for the elements of vector x, must be larger than zero.
2739      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2740      */
ZHER(@plo int Uplo, double alpha, Allocation X, int incX, Allocation A)2741     public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
2742         // same as SYR
2743         int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A);
2744 
2745         boolean mUseIncSupp = isIncSupp();
2746         long aID = A.getID(mRS);
2747         long xID = X.getID(mRS);
2748         if (mUseIncSupp) {
2749             aID = getDummyAlloc(A);
2750             xID = getDummyAlloc(X);
2751         }
2752         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp);
2753     }
2754 
2755     /**
2756      * ZHPR performs the rank 1 operation
2757      * A := alpha*x*x**H + A
2758      *
2759      * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
2760      *
2761      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2762      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2763      *       'a' to packed matrix 'b'.
2764      *           k = 0
2765      *           for i in range(0, n):
2766      *              for j in range(i, n):
2767      *                  b[k++] = a[i, j]
2768      *
2769      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2770      * @param alpha The scalar alpha.
2771      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2772      * @param incX The increment for the elements of vector x, must be larger than zero.
2773      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2774      */
ZHPR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Ap)2775     public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
2776         // equivalent to SPR for validation
2777         int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap);
2778 
2779         boolean mUseIncSupp = isIncSupp();
2780         long apID = Ap.getID(mRS);
2781         long xID = X.getID(mRS);
2782         if (mUseIncSupp) {
2783             apID = getDummyAlloc(Ap);
2784             xID = getDummyAlloc(X);
2785         }
2786         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp);
2787     }
2788 
2789     /**
2790      * ZHER2 performs the symmetric rank 2 operation
2791      * A := alpha*x*y**H + alpha*y*x**H + A
2792      *
2793      * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
2794      *
2795      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2796      * @param alpha The scalar alpha.
2797      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2798      * @param incX The increment for the elements of vector x, must be larger than zero.
2799      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2800      * @param incY The increment for the elements of vector y, must be larger than zero.
2801      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2802      */
ZHER2(@plo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2803     public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2804         // same as SYR2
2805         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2806 
2807         boolean mUseIncSupp = isIncSupp();
2808         long aID = A.getID(mRS);
2809         long xID = X.getID(mRS);
2810         long yID = Y.getID(mRS);
2811         if (mUseIncSupp) {
2812             aID = getDummyAlloc(A);
2813             xID = getDummyAlloc(X);
2814             yID = getDummyAlloc(Y);
2815         }
2816         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2817     }
2818 
2819     /**
2820      * ZHPR2 performs the symmetric rank 2 operation
2821      * A := alpha*x*y**H + alpha*y*x**H + A
2822      *
2823      * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
2824      *
2825      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2826      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2827      *       'a' to packed matrix 'b'.
2828      *           k = 0
2829      *           for i in range(0, n):
2830      *              for j in range(i, n):
2831      *                  b[k++] = a[i, j]
2832      *
2833      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2834      * @param alpha The scalar alpha.
2835      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2836      * @param incX The increment for the elements of vector x, must be larger than zero.
2837      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2838      * @param incY The increment for the elements of vector y, must be larger than zero.
2839      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2840      */
ZHPR2(@plo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2841     public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2842         // same as SPR2
2843         int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2844 
2845         boolean mUseIncSupp = isIncSupp();
2846         long apID = Ap.getID(mRS);
2847         long xID = X.getID(mRS);
2848         long yID = Y.getID(mRS);
2849         if (mUseIncSupp) {
2850             apID = getDummyAlloc(Ap);
2851             xID = getDummyAlloc(X);
2852             yID = getDummyAlloc(Y);
2853         }
2854         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp);
2855     }
2856 
2857 
2858     /**
2859      * Level 3 BLAS
2860      */
2861 
validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C)2862     static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {
2863         int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;
2864         if ((A != null && !A.getType().getElement().isCompatible(e)) ||
2865             (B != null && !B.getType().getElement().isCompatible(e)) ||
2866             (C != null && !C.getType().getElement().isCompatible(e))) {
2867             throw new RSRuntimeException("Called BLAS with wrong Element type");
2868         }
2869         if (C == null) {
2870             //since matrix C is used to store the result, it cannot be null.
2871             throw new RSRuntimeException("Allocation C cannot be null");
2872         }
2873         cM = C.getType().getY();
2874         cN = C.getType().getX();
2875 
2876         if (Side == RIGHT) {
2877             if ((A == null && B != null) || (A != null && B == null)) {
2878                 throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa");
2879             }
2880             if (B != null) {
2881                 bM = A.getType().getY();
2882                 bN = A.getType().getX();
2883             }
2884             if (A != null) {
2885                 aM = B.getType().getY();
2886                 aN = B.getType().getX();
2887             }
2888         } else {
2889             if (A != null) {
2890                 if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) {
2891                     aN = A.getType().getY();
2892                     aM = A.getType().getX();
2893                 } else {
2894                     aM = A.getType().getY();
2895                     aN = A.getType().getX();
2896                 }
2897             }
2898             if (B != null) {
2899                 if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) {
2900                     bN = B.getType().getY();
2901                     bM = B.getType().getX();
2902                 } else {
2903                     bM = B.getType().getY();
2904                     bN = B.getType().getX();
2905                 }
2906             }
2907         }
2908         if (A != null && B != null && C != null) {
2909             if (aN != bM || aM != cM || bN != cN) {
2910                 throw new RSRuntimeException("Called BLAS with invalid dimensions");
2911             }
2912         } else if (A != null && C != null) {
2913             // A and C only, for SYRK
2914             if (cM != cN) {
2915                 throw new RSRuntimeException("Matrix C is not symmetric");
2916             }
2917             if (aM != cM) {
2918                 throw new RSRuntimeException("Called BLAS with invalid dimensions");
2919             }
2920         } else if (A != null && B != null) {
2921             // A and B only
2922             if (aN != bM) {
2923                 throw new RSRuntimeException("Called BLAS with invalid dimensions");
2924             }
2925         }
2926 
2927     }
2928 
2929     /**
2930      * SGEMM performs one of the matrix-matrix operations
2931      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
2932      *
2933      * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
2934      *
2935      * @param TransA The type of transpose applied to matrix A.
2936      * @param TransB The type of transpose applied to matrix B.
2937      * @param alpha The scalar alpha.
2938      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2939      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2940      * @param beta The scalar beta.
2941      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2942      */
SGEMM(@ranspose int TransA, @Transpose int TransB, float alpha, Allocation A, Allocation B, float beta, Allocation C)2943     public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A,
2944                       Allocation B, float beta, Allocation C) {
2945         validateTranspose(TransA);
2946         validateTranspose(TransB);
2947         validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C);
2948 
2949         int M = -1, N = -1, K = -1;
2950         if (TransA != NO_TRANSPOSE) {
2951             M = A.getType().getX();
2952             K = A.getType().getY();
2953         } else {
2954             M = A.getType().getY();
2955             K = A.getType().getX();
2956         }
2957         if (TransB != NO_TRANSPOSE) {
2958             N = B.getType().getY();
2959         } else {
2960             N = B.getType().getX();
2961         }
2962 
2963         boolean mUseIncSupp = isIncSupp();
2964         long aID = A.getID(mRS);
2965         long bID = B.getID(mRS);
2966         long cID = C.getID(mRS);
2967         if (mUseIncSupp) {
2968             aID = getDummyAlloc(A);
2969             bID = getDummyAlloc(B);
2970             cID = getDummyAlloc(C);
2971         }
2972         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, aID, bID,
2973                                         beta, cID, 0, 0, 0, 0, mUseIncSupp);
2974     }
2975 
2976     /**
2977      * DGEMM performs one of the matrix-matrix operations
2978      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
2979      *
2980      * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
2981      *
2982      * @param TransA The type of transpose applied to matrix A.
2983      * @param TransB The type of transpose applied to matrix B.
2984      * @param alpha The scalar alpha.
2985      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2986      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2987      * @param beta The scalar beta.
2988      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2989      */
DGEMM(@ranspose int TransA, @Transpose int TransB, double alpha, Allocation A, Allocation B, double beta, Allocation C)2990     public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A,
2991                       Allocation B, double beta, Allocation C) {
2992         validateTranspose(TransA);
2993         validateTranspose(TransB);
2994         validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C);
2995         int M = -1, N = -1, K = -1;
2996         if (TransA != NO_TRANSPOSE) {
2997             M = A.getType().getX();
2998             K = A.getType().getY();
2999         } else {
3000             M = A.getType().getY();
3001             K = A.getType().getX();
3002         }
3003         if (TransB != NO_TRANSPOSE) {
3004             N = B.getType().getY();
3005         } else {
3006             N = B.getType().getX();
3007         }
3008 
3009         boolean mUseIncSupp = isIncSupp();
3010         long aID = A.getID(mRS);
3011         long bID = B.getID(mRS);
3012         long cID = C.getID(mRS);
3013         if (mUseIncSupp) {
3014             aID = getDummyAlloc(A);
3015             bID = getDummyAlloc(B);
3016             cID = getDummyAlloc(C);
3017         }
3018         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, aID, bID,
3019                                         beta, cID, 0, 0, 0, 0, mUseIncSupp);
3020     }
3021 
3022     /**
3023      * CGEMM performs one of the matrix-matrix operations
3024      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3025      *
3026      * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
3027      *
3028      * @param TransA The type of transpose applied to matrix A.
3029      * @param TransB The type of transpose applied to matrix B.
3030      * @param alpha The scalar alpha.
3031      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3032      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3033      * @param beta The scalar beta.
3034      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3035      */
CGEMM(@ranspose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3036     public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A,
3037                       Allocation B, Float2 beta, Allocation C) {
3038         validateTranspose(TransA);
3039         validateTranspose(TransB);
3040         validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C);
3041         int M = -1, N = -1, K = -1;
3042         if (TransA != NO_TRANSPOSE) {
3043             M = A.getType().getX();
3044             K = A.getType().getY();
3045         } else {
3046             M = A.getType().getY();
3047             K = A.getType().getX();
3048         }
3049         if (TransB != NO_TRANSPOSE) {
3050             N = B.getType().getY();
3051         } else {
3052             N = B.getType().getX();
3053         }
3054 
3055         boolean mUseIncSupp = isIncSupp();
3056         long aID = A.getID(mRS);
3057         long bID = B.getID(mRS);
3058         long cID = C.getID(mRS);
3059         if (mUseIncSupp) {
3060             aID = getDummyAlloc(A);
3061             bID = getDummyAlloc(B);
3062             cID = getDummyAlloc(C);
3063         }
3064         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha.x, alpha.y, aID, bID,
3065                                          beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3066     }
3067 
3068     /**
3069      * ZGEMM performs one of the matrix-matrix operations
3070      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3071      *
3072      * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
3073      *
3074      * @param TransA The type of transpose applied to matrix A.
3075      * @param TransB The type of transpose applied to matrix B.
3076      * @param alpha The scalar alpha.
3077      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2
3078      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2
3079      * @param beta The scalar beta.
3080      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2
3081      */
ZGEMM(@ranspose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3082     public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A,
3083                       Allocation B, Double2 beta, Allocation C) {
3084         validateTranspose(TransA);
3085         validateTranspose(TransB);
3086         validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C);
3087         int M = -1, N = -1, K = -1;
3088         if (TransA != NO_TRANSPOSE) {
3089             M = A.getType().getX();
3090             K = A.getType().getY();
3091         } else {
3092             M = A.getType().getY();
3093             K = A.getType().getX();
3094         }
3095         if (TransB != NO_TRANSPOSE) {
3096             N = B.getType().getY();
3097         } else {
3098             N = B.getType().getX();
3099         }
3100 
3101         boolean mUseIncSupp = isIncSupp();
3102         long aID = A.getID(mRS);
3103         long bID = B.getID(mRS);
3104         long cID = C.getID(mRS);
3105         if (mUseIncSupp) {
3106             aID = getDummyAlloc(A);
3107             bID = getDummyAlloc(B);
3108             cID = getDummyAlloc(C);
3109         }
3110         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha.x, alpha.y, aID, bID,
3111                                    beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3112     }
3113 
3114     /**
3115      * SSYMM performs one of the matrix-matrix operations
3116      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3117      *
3118      * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
3119      *
3120      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3121      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3122      * @param alpha The scalar alpha.
3123      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3124      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3125      * @param beta The scalar beta.
3126      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
3127      */
SSYMM(@ide int Side, @Uplo int Uplo, float alpha, Allocation A, Allocation B, float beta, Allocation C)3128     public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A,
3129                       Allocation B, float beta, Allocation C) {
3130         validateSide(Side);
3131         validateUplo(Uplo);
3132         //For SYMM, Matrix A should be symmetric
3133         if (A.getType().getX() != A.getType().getY()) {
3134             throw new RSRuntimeException("Matrix A is not symmetric");
3135         }
3136         validateL3(Element.F32(mRS), 0, 0, Side, A, B, C);
3137 
3138         boolean mUseIncSupp = isIncSupp();
3139         long aID = A.getID(mRS);
3140         long bID = B.getID(mRS);
3141         long cID = C.getID(mRS);
3142         if (mUseIncSupp) {
3143             aID = getDummyAlloc(A);
3144             bID = getDummyAlloc(B);
3145             cID = getDummyAlloc(C);
3146         }
3147         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID,
3148                                         beta, cID, 0, 0, 0, 0, mUseIncSupp);
3149     }
3150 
3151     /**
3152      * DSYMM performs one of the matrix-matrix operations
3153      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3154      *
3155      * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
3156      *
3157      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3158      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3159      * @param alpha The scalar alpha.
3160      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3161      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3162      * @param beta The scalar beta.
3163      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
3164      */
DSYMM(@ide int Side, @Uplo int Uplo, double alpha, Allocation A, Allocation B, double beta, Allocation C)3165     public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A,
3166                       Allocation B, double beta, Allocation C) {
3167         validateSide(Side);
3168         validateUplo(Uplo);
3169         if (A.getType().getX() != A.getType().getY()) {
3170             throw new RSRuntimeException("Matrix A is not symmetric");
3171         }
3172         validateL3(Element.F64(mRS), 0, 0, Side, A, B, C);
3173 
3174         boolean mUseIncSupp = isIncSupp();
3175         long aID = A.getID(mRS);
3176         long bID = B.getID(mRS);
3177         long cID = C.getID(mRS);
3178         if (mUseIncSupp) {
3179             aID = getDummyAlloc(A);
3180             bID = getDummyAlloc(B);
3181             cID = getDummyAlloc(C);
3182         }
3183         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID,
3184                                         beta, cID, 0, 0, 0, 0, mUseIncSupp);
3185     }
3186 
3187     /**
3188      * CSYMM performs one of the matrix-matrix operations
3189      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3190      *
3191      * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
3192      *
3193      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3194      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3195      * @param alpha The scalar alpha.
3196      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3197      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3198      * @param beta The scalar beta.
3199      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3200      */
CSYMM(@ide int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3201     public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A,
3202                       Allocation B, Float2 beta, Allocation C) {
3203         validateSide(Side);
3204         validateUplo(Uplo);
3205         if (A.getType().getX() != A.getType().getY()) {
3206             throw new RSRuntimeException("Matrix A is not symmetric");
3207         }
3208         validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C);
3209 
3210         boolean mUseIncSupp = isIncSupp();
3211         long aID = A.getID(mRS);
3212         long bID = B.getID(mRS);
3213         long cID = C.getID(mRS);
3214         if (mUseIncSupp) {
3215             aID = getDummyAlloc(A);
3216             bID = getDummyAlloc(B);
3217             cID = getDummyAlloc(C);
3218         }
3219         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID,
3220                                          beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3221     }
3222 
3223     /**
3224      * ZSYMM performs one of the matrix-matrix operations
3225      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3226      *
3227      * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
3228      *
3229      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3230      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3231      * @param alpha The scalar alpha.
3232      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3233      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3234      * @param beta The scalar beta.
3235      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3236      */
ZSYMM(@ide int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3237     public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A,
3238                       Allocation B, Double2 beta, Allocation C) {
3239         validateSide(Side);
3240         validateUplo(Uplo);
3241         if (A.getType().getX() != A.getType().getY()) {
3242             throw new RSRuntimeException("Matrix A is not symmetric");
3243         }
3244         validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C);
3245 
3246         boolean mUseIncSupp = isIncSupp();
3247         long aID = A.getID(mRS);
3248         long bID = B.getID(mRS);
3249         long cID = C.getID(mRS);
3250         if (mUseIncSupp) {
3251             aID = getDummyAlloc(A);
3252             bID = getDummyAlloc(B);
3253             cID = getDummyAlloc(C);
3254         }
3255         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID,
3256                                    beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3257     }
3258 
3259     /**
3260      * SSYRK performs one of the symmetric rank k operations
3261      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3262      *
3263      * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
3264      *
3265      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3266      * @param Trans The type of transpose applied to the operation.
3267      * @param alpha The scalar alpha.
3268      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3269      * @param beta The scalar beta.
3270      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
3271      */
SSYRK(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C)3272     public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
3273         validateTranspose(Trans);
3274         validateUplo(Uplo);
3275         validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C);
3276         int K = -1;
3277         if (Trans != NO_TRANSPOSE) {
3278             K = A.getType().getY();
3279         } else {
3280             K = A.getType().getX();
3281         }
3282 
3283         boolean mUseIncSupp = isIncSupp();
3284         long aID = A.getID(mRS);
3285         long cID = C.getID(mRS);
3286         if (mUseIncSupp) {
3287             aID = getDummyAlloc(A);
3288             cID = getDummyAlloc(C);
3289         }
3290         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3291     }
3292 
3293     /**
3294      * DSYRK performs one of the symmetric rank k operations
3295      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3296      *
3297      * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
3298      *
3299      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3300      * @param Trans The type of transpose applied to the operation.
3301      * @param alpha The scalar alpha.
3302      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3303      * @param beta The scalar beta.
3304      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
3305      */
DSYRK(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C)3306     public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
3307         validateTranspose(Trans);
3308         validateUplo(Uplo);
3309         validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C);
3310         int K = -1;
3311         if (Trans != NO_TRANSPOSE) {
3312             K = A.getType().getY();
3313         } else {
3314             K = A.getType().getX();
3315         }
3316 
3317         boolean mUseIncSupp = isIncSupp();
3318         long aID = A.getID(mRS);
3319         long cID = C.getID(mRS);
3320         if (mUseIncSupp) {
3321             aID = getDummyAlloc(A);
3322             cID = getDummyAlloc(C);
3323         }
3324         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3325     }
3326 
3327     /**
3328      * CSYRK performs one of the symmetric rank k operations
3329      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3330      *
3331      * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
3332      *
3333      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3334      * @param Trans The type of transpose applied to the operation.
3335      * @param alpha The scalar alpha.
3336      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3337      * @param beta The scalar beta.
3338      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3339      */
CSYRK(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C)3340     public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) {
3341         validateTranspose(Trans);
3342         validateUplo(Uplo);
3343         validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C);
3344         int K = -1;
3345         if (Trans != NO_TRANSPOSE) {
3346             K = A.getType().getY();
3347         } else {
3348             K = A.getType().getX();
3349         }
3350 
3351         boolean mUseIncSupp = isIncSupp();
3352         long aID = A.getID(mRS);
3353         long cID = C.getID(mRS);
3354         if (mUseIncSupp) {
3355             aID = getDummyAlloc(A);
3356             cID = getDummyAlloc(C);
3357         }
3358         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y,
3359                                          C.getID(mRS), 0, 0, 0, 0, mUseIncSupp);
3360     }
3361 
3362     /**
3363      * ZSYRK performs one of the symmetric rank k operations
3364      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3365      *
3366      * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
3367      *
3368      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3369      * @param Trans The type of transpose applied to the operation.
3370      * @param alpha The scalar alpha.
3371      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3372      * @param beta The scalar beta.
3373      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3374      */
ZSYRK(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C)3375     public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) {
3376         validateTranspose(Trans);
3377         validateUplo(Uplo);
3378         validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C);
3379         int K = -1;
3380         if (Trans != NO_TRANSPOSE) {
3381             K = A.getType().getY();
3382         } else {
3383             K = A.getType().getX();
3384         }
3385 
3386         boolean mUseIncSupp = isIncSupp();
3387         long aID = A.getID(mRS);
3388         long cID = C.getID(mRS);
3389         if (mUseIncSupp) {
3390             aID = getDummyAlloc(A);
3391             cID = getDummyAlloc(C);
3392         }
3393         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y,
3394                                    C.getID(mRS), 0, 0, 0, 0, mUseIncSupp);
3395     }
3396 
validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C)3397     static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
3398         validateTranspose(Trans);
3399         if (!A.getType().getElement().isCompatible(e) ||
3400             !B.getType().getElement().isCompatible(e) ||
3401             !C.getType().getElement().isCompatible(e)) {
3402             throw new RSRuntimeException("Called BLAS with wrong Element type");
3403         }
3404         int Cdim = -1;
3405         // A is n x k if no transpose, k x n if transpose
3406         // C is n x n
3407         if (Trans == TRANSPOSE) {
3408             // check columns versus C
3409             Cdim = A.getType().getX();
3410         } else {
3411             // check rows versus C
3412             Cdim = A.getType().getY();
3413         }
3414         if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {
3415             throw new RSRuntimeException("Invalid symmetric matrix in SYR2K");
3416         }
3417         // A dims == B dims
3418         if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
3419             throw new RSRuntimeException("Invalid A and B in SYR2K");
3420         }
3421     }
3422 
3423     /**
3424      * SSYR2K performs one of the symmetric rank 2k operations
3425      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3426      *
3427      * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
3428      *
3429      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3430      * @param Trans The type of transpose applied to the operation.
3431      * @param alpha The scalar alpha.
3432      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3433      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3434      * @param beta The scalar beta.
3435      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
3436      */
SSYR2K(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C)3437     public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) {
3438         validateUplo(Uplo);
3439         validateSYR2K(Element.F32(mRS), Trans, A, B, C);
3440         int K = -1;
3441         if (Trans != NO_TRANSPOSE) {
3442             K = A.getType().getY();
3443         } else {
3444             K = A.getType().getX();
3445         }
3446 
3447         boolean mUseIncSupp = isIncSupp();
3448         long aID = A.getID(mRS);
3449         long bID = B.getID(mRS);
3450         long cID = C.getID(mRS);
3451         if (mUseIncSupp) {
3452             aID = getDummyAlloc(A);
3453             bID = getDummyAlloc(B);
3454             cID = getDummyAlloc(C);
3455         }
3456         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3457     }
3458 
3459     /**
3460      * DSYR2K performs one of the symmetric rank 2k operations
3461      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3462      *
3463      * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
3464      *
3465      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3466      * @param Trans The type of transpose applied to the operation.
3467      * @param alpha The scalar alpha.
3468      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3469      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3470      * @param beta The scalar beta.
3471      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
3472      */
DSYR2K(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C)3473     public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) {
3474         validateUplo(Uplo);
3475         validateSYR2K(Element.F64(mRS), Trans, A, B, C);
3476         int K = -1;
3477         if (Trans != NO_TRANSPOSE) {
3478             K = A.getType().getY();
3479         } else {
3480             K = A.getType().getX();
3481         }
3482 
3483         boolean mUseIncSupp = isIncSupp();
3484         long aID = A.getID(mRS);
3485         long bID = B.getID(mRS);
3486         long cID = C.getID(mRS);
3487         if (mUseIncSupp) {
3488             aID = getDummyAlloc(A);
3489             bID = getDummyAlloc(B);
3490             cID = getDummyAlloc(C);
3491         }
3492         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3493     }
3494 
3495     /**
3496      * CSYR2K performs one of the symmetric rank 2k operations
3497      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3498      *
3499      * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
3500      *
3501      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3502      * @param Trans The type of transpose applied to the operation.
3503      * @param alpha The scalar alpha.
3504      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3505      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3506      * @param beta The scalar beta.
3507      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3508      */
CSYR2K(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3509     public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
3510         validateUplo(Uplo);
3511         validateSYR2K(Element.F32_2(mRS), Trans, A, B, C);
3512         int K = -1;
3513         if (Trans != NO_TRANSPOSE) {
3514             K = A.getType().getY();
3515         } else {
3516             K = A.getType().getX();
3517         }
3518 
3519         boolean mUseIncSupp = isIncSupp();
3520         long aID = A.getID(mRS);
3521         long bID = B.getID(mRS);
3522         long cID = C.getID(mRS);
3523         if (mUseIncSupp) {
3524             aID = getDummyAlloc(A);
3525             bID = getDummyAlloc(B);
3526             cID = getDummyAlloc(C);
3527         }
3528         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3529     }
3530 
3531     /**
3532      * ZSYR2K performs one of the symmetric rank 2k operations
3533      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3534      *
3535      * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
3536      *
3537      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3538      * @param Trans The type of transpose applied to the operation.
3539      * @param alpha The scalar alpha.
3540      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3541      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3542      * @param beta The scalar beta.
3543      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3544      */
ZSYR2K(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3545     public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
3546         validateUplo(Uplo);
3547         validateSYR2K(Element.F64_2(mRS), Trans, A, B, C);
3548         int K = -1;
3549         if (Trans != NO_TRANSPOSE) {
3550             K = A.getType().getY();
3551         } else {
3552             K = A.getType().getX();
3553         }
3554 
3555         boolean mUseIncSupp = isIncSupp();
3556         long aID = A.getID(mRS);
3557         long bID = B.getID(mRS);
3558         long cID = C.getID(mRS);
3559         if (mUseIncSupp) {
3560             aID = getDummyAlloc(A);
3561             bID = getDummyAlloc(B);
3562             cID = getDummyAlloc(C);
3563         }
3564         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3565     }
3566 
validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B)3567     static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
3568         validateSide(Side);
3569         validateTranspose(TransA);
3570         int aM = -1, aN = -1, bM = -1, bN = -1;
3571         if (!A.getType().getElement().isCompatible(e) ||
3572             !B.getType().getElement().isCompatible(e)) {
3573             throw new RSRuntimeException("Called BLAS with wrong Element type");
3574         }
3575 
3576         aM = A.getType().getY();
3577         aN = A.getType().getX();
3578         if (aM != aN) {
3579             throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A");
3580         }
3581 
3582         bM = B.getType().getY();
3583         bN = B.getType().getX();
3584         if (Side == LEFT) {
3585             if (aN != bM) {
3586                 throw new RSRuntimeException("Called TRMM with invalid matrices");
3587             }
3588         } else {
3589             if (bN != aM) {
3590                 throw new RSRuntimeException("Called TRMM with invalid matrices");
3591             }
3592         }
3593     }
3594 
3595     /**
3596      * STRMM performs one of the matrix-matrix operations
3597      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3598      * op(A) is one of  op(A) = A  or  op(A) = A**T
3599      *
3600      * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
3601      *
3602      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3603      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3604      * @param TransA The type of transpose applied to matrix A.
3605      * @param Diag Specifies whether or not A is unit triangular.
3606      * @param alpha The scalar alpha.
3607      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3608      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3609      */
STRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B)3610     public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
3611         validateUplo(Uplo);
3612         validateDiag(Diag);
3613         validateTRMM(Element.F32(mRS), Side, TransA, A, B);
3614 
3615         boolean mUseIncSupp = isIncSupp();
3616         long aID = A.getID(mRS);
3617         long bID = B.getID(mRS);
3618         if (mUseIncSupp) {
3619             aID = getDummyAlloc(A);
3620             bID = getDummyAlloc(B);
3621         }
3622         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3623                                         alpha, aID, bID, 0.f, 0, 0, 0, 0, 0, mUseIncSupp);
3624     }
3625 
3626     /**
3627      * DTRMM performs one of the matrix-matrix operations
3628      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3629      * op(A) is one of  op(A) = A  or  op(A) = A**T
3630      *
3631      * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
3632      *
3633      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3634      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3635      * @param TransA The type of transpose applied to matrix A.
3636      * @param Diag Specifies whether or not A is unit triangular.
3637      * @param alpha The scalar alpha.
3638      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3639      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3640      */
DTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B)3641     public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
3642         validateUplo(Uplo);
3643         validateDiag(Diag);
3644         validateTRMM(Element.F64(mRS), Side, TransA, A, B);
3645 
3646         boolean mUseIncSupp = isIncSupp();
3647         long aID = A.getID(mRS);
3648         long bID = B.getID(mRS);
3649         if (mUseIncSupp) {
3650             aID = getDummyAlloc(A);
3651             bID = getDummyAlloc(B);
3652         }
3653         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3654                                         alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3655     }
3656 
3657     /**
3658      * CTRMM performs one of the matrix-matrix operations
3659      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3660      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3661      *
3662      * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
3663      *
3664      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3665      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3666      * @param TransA The type of transpose applied to matrix A.
3667      * @param Diag Specifies whether or not A is unit triangular.
3668      * @param alpha The scalar alpha.
3669      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3670      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3671      */
CTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B)3672     public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
3673         validateUplo(Uplo);
3674         validateDiag(Diag);
3675         validateTRMM(Element.F32_2(mRS), Side, TransA, A, B);
3676 
3677         boolean mUseIncSupp = isIncSupp();
3678         long aID = A.getID(mRS);
3679         long bID = B.getID(mRS);
3680         if (mUseIncSupp) {
3681             aID = getDummyAlloc(A);
3682             bID = getDummyAlloc(B);
3683         }
3684         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3685                                          alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3686     }
3687 
3688     /**
3689      * ZTRMM performs one of the matrix-matrix operations
3690      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3691      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3692      *
3693      * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
3694      *
3695      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3696      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3697      * @param TransA The type of transpose applied to matrix A.
3698      * @param Diag Specifies whether or not A is unit triangular.
3699      * @param alpha The scalar alpha.
3700      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3701      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3702      */
ZTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B)3703     public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
3704         validateUplo(Uplo);
3705         validateDiag(Diag);
3706         validateTRMM(Element.F64_2(mRS), Side, TransA, A, B);
3707 
3708         boolean mUseIncSupp = isIncSupp();
3709         long aID = A.getID(mRS);
3710         long bID = B.getID(mRS);
3711         if (mUseIncSupp) {
3712             aID = getDummyAlloc(A);
3713             bID = getDummyAlloc(B);
3714         }
3715         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3716                                    alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3717     }
3718 
validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B)3719     static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
3720         int adim = -1, bM = -1, bN = -1;
3721         validateSide(Side);
3722         validateTranspose(TransA);
3723         if (!A.getType().getElement().isCompatible(e) ||
3724             !B.getType().getElement().isCompatible(e)) {
3725             throw new RSRuntimeException("Called BLAS with wrong Element type");
3726         }
3727         adim = A.getType().getX();
3728         if (adim != A.getType().getY()) {
3729             // this may be unnecessary, the restriction could potentially be relaxed
3730             // A needs to contain at least that symmetric matrix but could theoretically be larger
3731             // for now we assume adapters are sufficient, will reevaluate in the future
3732             throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A");
3733         }
3734         bM = B.getType().getY();
3735         bN = B.getType().getX();
3736         if (Side == LEFT) {
3737             // A is M*M
3738             if (adim != bM) {
3739                 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
3740             }
3741         } else {
3742             // A is N*N
3743             if (adim != bN) {
3744                 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
3745             }
3746         }
3747     }
3748 
3749     /**
3750      * STRSM solves one of the matrix equations
3751      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3752      * op(A) is one of  op(A) = A  or  op(A) = A**T
3753      *
3754      * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
3755      *
3756      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3757      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3758      * @param TransA The type of transpose applied to matrix A.
3759      * @param Diag Specifies whether or not A is unit triangular.
3760      * @param alpha The scalar alpha.
3761      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3762      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3763      */
STRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B)3764     public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
3765         validateUplo(Uplo);
3766         validateDiag(Diag);
3767         validateTRSM(Element.F32(mRS), Side, TransA, A, B);
3768 
3769         boolean mUseIncSupp = isIncSupp();
3770         long aID = A.getID(mRS);
3771         long bID = B.getID(mRS);
3772         if (mUseIncSupp) {
3773             aID = getDummyAlloc(A);
3774             bID = getDummyAlloc(B);
3775         }
3776         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3777                                         alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3778     }
3779 
3780     /**
3781      * DTRSM solves one of the matrix equations
3782      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3783      * op(A) is one of  op(A) = A  or  op(A) = A**T
3784      *
3785      * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
3786      *
3787      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3788      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3789      * @param TransA The type of transpose applied to matrix A.
3790      * @param Diag Specifies whether or not A is unit triangular.
3791      * @param alpha The scalar alpha.
3792      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3793      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3794      */
DTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B)3795     public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
3796         validateUplo(Uplo);
3797         validateDiag(Diag);
3798         validateTRSM(Element.F64(mRS), Side, TransA, A, B);
3799 
3800         boolean mUseIncSupp = isIncSupp();
3801         long aID = A.getID(mRS);
3802         long bID = B.getID(mRS);
3803         if (mUseIncSupp) {
3804             aID = getDummyAlloc(A);
3805             bID = getDummyAlloc(B);
3806         }
3807         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3808                                         alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3809     }
3810 
3811     /**
3812      * CTRSM solves one of the matrix equations
3813      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3814      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3815      *
3816      * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
3817      *
3818      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3819      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3820      * @param TransA The type of transpose applied to matrix A.
3821      * @param Diag Specifies whether or not A is unit triangular.
3822      * @param alpha The scalar alpha.
3823      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3824      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3825      */
CTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B)3826     public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
3827         validateUplo(Uplo);
3828         validateDiag(Diag);
3829         validateTRSM(Element.F32_2(mRS), Side, TransA, A, B);
3830 
3831         boolean mUseIncSupp = isIncSupp();
3832         long aID = A.getID(mRS);
3833         long bID = B.getID(mRS);
3834         if (mUseIncSupp) {
3835             aID = getDummyAlloc(A);
3836             bID = getDummyAlloc(B);
3837         }
3838         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3839                                          alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3840     }
3841 
3842     /**
3843      * ZTRSM solves one of the matrix equations
3844      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3845      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3846      *
3847      * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
3848      *
3849      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3850      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3851      * @param TransA The type of transpose applied to matrix A.
3852      * @param Diag Specifies whether or not A is unit triangular.
3853      * @param alpha The scalar alpha.
3854      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3855      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3856      */
ZTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B)3857     public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
3858         validateUplo(Uplo);
3859         validateDiag(Diag);
3860         validateTRSM(Element.F64_2(mRS), Side, TransA, A, B);
3861 
3862         boolean mUseIncSupp = isIncSupp();
3863         long aID = A.getID(mRS);
3864         long bID = B.getID(mRS);
3865         if (mUseIncSupp) {
3866             aID = getDummyAlloc(A);
3867             bID = getDummyAlloc(B);
3868         }
3869         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3870                                    alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3871     }
3872 
validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C)3873     static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) {
3874         validateSide(Side);
3875 
3876         if (!A.getType().getElement().isCompatible(e) ||
3877             !B.getType().getElement().isCompatible(e) ||
3878             !C.getType().getElement().isCompatible(e)) {
3879             throw new RSRuntimeException("Called BLAS with wrong Element type");
3880         }
3881 
3882         // A must be square; can potentially be relaxed similar to TRSM
3883         int adim = A.getType().getX();
3884         if (adim != A.getType().getY()) {
3885             throw new RSRuntimeException("Called HEMM with non-square A");
3886         }
3887         if ((Side == LEFT && adim != B.getType().getY()) ||
3888             (Side == RIGHT && adim != B.getType().getX())) {
3889             throw new RSRuntimeException("Called HEMM with invalid B");
3890         }
3891         if (B.getType().getX() != C.getType().getX() ||
3892             B.getType().getY() != C.getType().getY()) {
3893             throw new RSRuntimeException("Called HEMM with mismatched B and C");
3894         }
3895     }
3896 
3897     /**
3898      * CHEMM performs one of the matrix-matrix operations
3899      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3900      *
3901      * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
3902      *
3903      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3904      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3905      * @param alpha The scalar alpha.
3906      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3907      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3908      * @param beta The scalar beta.
3909      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3910      */
CHEMM(@ide int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3911     public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
3912         validateUplo(Uplo);
3913         validateHEMM(Element.F32_2(mRS), Side, A, B, C);
3914 
3915         boolean mUseIncSupp = isIncSupp();
3916         long aID = A.getID(mRS);
3917         long bID = B.getID(mRS);
3918         long cID = C.getID(mRS);
3919         if (mUseIncSupp) {
3920             aID = getDummyAlloc(A);
3921             bID = getDummyAlloc(B);
3922             cID = getDummyAlloc(C);
3923         }
3924         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
3925                                          alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3926     }
3927 
3928     /**
3929      * ZHEMM performs one of the matrix-matrix operations
3930      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3931      *
3932      * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
3933      *
3934      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3935      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3936      * @param alpha The scalar alpha.
3937      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3938      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3939      * @param beta The scalar beta.
3940      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3941      */
ZHEMM(@ide int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3942     public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
3943         validateUplo(Uplo);
3944         validateHEMM(Element.F64_2(mRS), Side, A, B, C);
3945 
3946         boolean mUseIncSupp = isIncSupp();
3947         long aID = A.getID(mRS);
3948         long bID = B.getID(mRS);
3949         long cID = C.getID(mRS);
3950         if (mUseIncSupp) {
3951             aID = getDummyAlloc(A);
3952             bID = getDummyAlloc(B);
3953             cID = getDummyAlloc(C);
3954         }
3955         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
3956                                    alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3957     }
3958 
validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C)3959     static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) {
3960         if (!A.getType().getElement().isCompatible(e) ||
3961             !C.getType().getElement().isCompatible(e)) {
3962             throw new RSRuntimeException("Called BLAS with wrong Element type");
3963         }
3964         validateConjTranspose(Trans);
3965         int cdim = C.getType().getX();
3966         if (cdim != C.getType().getY()) {
3967             throw new RSRuntimeException("Called HERK with non-square C");
3968         }
3969         if (Trans == NO_TRANSPOSE) {
3970             if (cdim != A.getType().getY()) {
3971                 throw new RSRuntimeException("Called HERK with invalid A");
3972             }
3973         } else {
3974             if (cdim != A.getType().getX()) {
3975                 throw new RSRuntimeException("Called HERK with invalid A");
3976             }
3977         }
3978     }
3979 
3980     /**
3981      * CHERK performs one of the hermitian rank k operations
3982      * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3983      *
3984      * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
3985      *
3986      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3987      * @param Trans The type of transpose applied to the operation.
3988      * @param alpha The scalar alpha.
3989      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3990      * @param beta The scalar beta.
3991      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3992      */
CHERK(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C)3993     public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
3994         validateUplo(Uplo);
3995         validateHERK(Element.F32_2(mRS), Trans, A, C);
3996         int k = 0;
3997         if (Trans == CONJ_TRANSPOSE) {
3998             k = A.getType().getY();
3999         } else {
4000             k = A.getType().getX();
4001         }
4002 
4003         boolean mUseIncSupp = isIncSupp();
4004         long aID = A.getID(mRS);
4005         long cID = C.getID(mRS);
4006         if (mUseIncSupp) {
4007             aID = getDummyAlloc(A);
4008             cID = getDummyAlloc(C);
4009         }
4010         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
4011                                          alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4012     }
4013 
4014     /**
4015      * ZHERK performs one of the hermitian rank k operations
4016      * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
4017      *
4018      * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
4019      *
4020      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
4021      * @param Trans The type of transpose applied to the operation.
4022      * @param alpha The scalar alpha.
4023      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
4024      * @param beta The scalar beta.
4025      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
4026      */
ZHERK(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C)4027     public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
4028         validateUplo(Uplo);
4029         validateHERK(Element.F64_2(mRS), Trans, A, C);
4030         int k = 0;
4031         if (Trans == CONJ_TRANSPOSE) {
4032             k = A.getType().getY();
4033         } else {
4034             k = A.getType().getX();
4035         }
4036 
4037         boolean mUseIncSupp = isIncSupp();
4038         long aID = A.getID(mRS);
4039         long cID = C.getID(mRS);
4040         if (mUseIncSupp) {
4041             aID = getDummyAlloc(A);
4042             cID = getDummyAlloc(C);
4043         }
4044         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
4045                                    alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4046     }
4047 
validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C)4048     static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
4049         if (!A.getType().getElement().isCompatible(e) ||
4050             !B.getType().getElement().isCompatible(e) ||
4051             !C.getType().getElement().isCompatible(e)) {
4052             throw new RSRuntimeException("Called BLAS with wrong Element type");
4053         }
4054         validateConjTranspose(Trans);
4055         int cdim = C.getType().getX();
4056         if (cdim != C.getType().getY()) {
4057             throw new RSRuntimeException("Called HER2K with non-square C");
4058         }
4059         if (Trans == NO_TRANSPOSE) {
4060             if (A.getType().getY() != cdim) {
4061                 throw new RSRuntimeException("Called HER2K with invalid matrices");
4062             }
4063         } else {
4064             if (A.getType().getX() != cdim) {
4065                 throw new RSRuntimeException("Called HER2K with invalid matrices");
4066             }
4067         }
4068         if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
4069             throw new RSRuntimeException("Called HER2K with invalid A and B matrices");
4070         }
4071     }
4072 
4073     /**
4074      * CHER2K performs one of the hermitian rank 2k operations
4075      * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
4076      *
4077      * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
4078      *
4079      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
4080      * @param Trans The type of transpose applied to the operation.
4081      * @param alpha The scalar alpha.
4082      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
4083      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
4084      * @param beta The scalar beta.
4085      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
4086      */
CHER2K(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C)4087     public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) {
4088         validateUplo(Uplo);
4089         validateHER2K(Element.F32_2(mRS), Trans, A, B, C);
4090         int k = 0;
4091         if (Trans == NO_TRANSPOSE) {
4092             k = A.getType().getX();
4093         } else {
4094             k = A.getType().getY();
4095         }
4096 
4097         boolean mUseIncSupp = isIncSupp();
4098         long aID = A.getID(mRS);
4099         long bID = B.getID(mRS);
4100         long cID = C.getID(mRS);
4101         if (mUseIncSupp) {
4102             aID = getDummyAlloc(A);
4103             bID = getDummyAlloc(B);
4104             cID = getDummyAlloc(C);
4105         }
4106         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
4107                                          A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4108     }
4109 
4110     /**
4111      * ZHER2K performs one of the hermitian rank 2k operations
4112      * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
4113      *
4114      * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
4115      *
4116      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
4117      * @param Trans The type of transpose applied to the operation.
4118      * @param alpha The scalar alpha.
4119      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
4120      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
4121      * @param beta The scalar beta.
4122      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
4123      */
ZHER2K(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C)4124     public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) {
4125         validateUplo(Uplo);
4126         validateHER2K(Element.F64_2(mRS), Trans, A, B, C);
4127         int k = 0;
4128         if (Trans == NO_TRANSPOSE) {
4129             k = A.getType().getX();
4130         } else {
4131             k = A.getType().getY();
4132         }
4133 
4134         boolean mUseIncSupp = isIncSupp();
4135         long aID = A.getID(mRS);
4136         long bID = B.getID(mRS);
4137         long cID = C.getID(mRS);
4138         if (mUseIncSupp) {
4139             aID = getDummyAlloc(A);
4140             bID = getDummyAlloc(B);
4141             cID = getDummyAlloc(C);
4142         }
4143         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
4144                                    A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4145     }
4146 
4147 
4148     /**
4149      * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)
4150      * Calculations are done in 1.10.21 fixed-point format for the final output,
4151      * just before there's a shift down to drop the fractional parts. The output
4152      * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
4153      * gives some headroom to avoid wrapping around on small overflows.
4154      *
4155      * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}.
4156      * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.
4157      * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}.
4158      * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.
4159      * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}.
4160      * @param c_offset The offset for all values in matrix C.
4161      * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
4162      **/
BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult)4163     public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) {
4164         validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C);
4165 
4166         if (a_offset < 0 || a_offset > 255) {
4167             throw new RSRuntimeException("Invalid a_offset passed to BNNM");
4168         }
4169         if (b_offset < 0 || b_offset > 255) {
4170             throw new RSRuntimeException("Invalid b_offset passed to BNNM");
4171         }
4172         int M = -1, N = -1, K = -1;
4173         M = A.getType().getY();
4174         N = B.getType().getY();
4175         K = A.getType().getX();
4176 
4177         boolean mUseIncSupp = isIncSupp();
4178         long aID = A.getID(mRS);
4179         long bID = B.getID(mRS);
4180         long cID = C.getID(mRS);
4181         if (mUseIncSupp) {
4182             aID = getDummyAlloc(A);
4183             bID = getDummyAlloc(B);
4184             cID = getDummyAlloc(C);
4185         }
4186         mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, aID, a_offset, bID, b_offset, cID, c_offset, c_mult, mUseIncSupp);
4187 
4188     }
4189 
4190 }
4191