1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.cts.rsblas;
18 
19 import android.renderscript.*;
20 import android.util.Log;
21 import java.util.ArrayList;
22 
23 public class IntrinsicBLAS extends IntrinsicBase {
24     private ScriptIntrinsicBLAS mBLAS;
25     private BLASData mBLASData;
26     private boolean mInitialized = false;
27 
28     private ArrayList<Allocation> mMatrixS;
29     private final float alphaS = 1.0f;
30     private final float betaS = 1.0f;
31 
32     private ArrayList<Allocation> mMatrixD;
33     private final double alphaD = 1.0;
34     private final double betaD = 1.0;
35 
36     private ArrayList<Allocation> mMatrixC;
37     private final Float2 alphaC = new Float2(1.0f, 0.0f);
38     private final Float2 betaC = new Float2(1.0f, 0.0f);
39 
40     private ArrayList<Allocation> mMatrixZ;
41     private final Double2 alphaZ = new Double2(1.0, 0.0);
42     private final Double2 betaZ = new Double2(1.0, 0.0);
43 
44     private int[] mTranspose = {ScriptIntrinsicBLAS.NO_TRANSPOSE,
45                                 ScriptIntrinsicBLAS.TRANSPOSE,
46                                 ScriptIntrinsicBLAS.CONJ_TRANSPOSE,
47                                 0};
48 
49     private int[] mUplo = {ScriptIntrinsicBLAS.UPPER,
50                            ScriptIntrinsicBLAS.LOWER,
51                            0};
52 
53     private int[] mDiag = {ScriptIntrinsicBLAS.NON_UNIT,
54                            ScriptIntrinsicBLAS.UNIT,
55                            0};
56 
57     private int[] mSide = {ScriptIntrinsicBLAS.LEFT,
58                            ScriptIntrinsicBLAS.RIGHT,
59                            0};
60 
61     private int[] mInc = {0, 1, 2};
62     private int[] mK = {-1, 0, 1};
63     private int[] mDim = {1, 2, 3, 256};
64 
65     @Override
setUp()66     protected void setUp() throws Exception {
67         super.setUp();
68 
69         // Now populate the test Matrixes and Vectors.
70         if (!mInitialized) {
71             mBLASData = new BLASData();
72             mBLASData.loadData(mCtx);
73             mBLAS = ScriptIntrinsicBLAS.create(mRS);
74             mMatrixS = new ArrayList<Allocation>();
75             mMatrixD = new ArrayList<Allocation>();
76             mMatrixC = new ArrayList<Allocation>();
77             mMatrixZ = new ArrayList<Allocation>();
78             for (int x : mDim) {
79                 for (int y : mDim) {
80                     mMatrixS.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), x, y)));
81                     mMatrixD.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), x, y)));
82                     mMatrixC.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), x, y)));
83                     mMatrixZ.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), x, y)));
84                 }
85             }
86             // Also need Allocation with mismatch Element.
87             Allocation misAlloc = Allocation.createTyped(mRS, Type.createXY(mRS, Element.U8(mRS), 1, 1));
88             mMatrixS.add(misAlloc);
89             mMatrixD.add(misAlloc);
90             mMatrixC.add(misAlloc);
91             mMatrixZ.add(misAlloc);
92             mInitialized = true;
93         }
94     }
95 
96     @Override
tearDown()97     protected void tearDown() throws Exception {
98         super.tearDown();
99     }
100 
101     // Calculate the square of the L2 norm of a matrix.
calcL2Norm(float[] input)102     private double calcL2Norm(float[] input) {
103         double l2Norm = 0;
104         for (int i = 0; i < input.length; ++i) {
105             l2Norm += input[i] * input[i];
106         }
107         return l2Norm;
108     }
109 
calcL2Norm(double[] input)110     private double calcL2Norm(double[] input) {
111         double l2Norm = 0;
112         for (int i = 0; i < input.length; ++i) {
113             l2Norm += input[i] * input[i];
114         }
115         return l2Norm;
116     }
117 
118     // Routine to verify if matrix are equivalent.
verifyMatrix(Allocation ref, Allocation out)119     private void verifyMatrix(Allocation ref, Allocation out) {
120         verifyMatrix(ref, out, false);
121     }
122 
123     // Use L2 norm of a matrix as the scale to determine whether two matrices are equivalent:
124     // if the absolute square error of any elements is smaller than the average L2 Norm
125     // per element times an allowed error range (1e-6), then the two matrices are considered equivalent.
126     // Criterion: (a[i,j] - a'[i,j])^2 < epsilon * ||A||/(M*N)
127     // M, N: the dimensions of the matrix; epsilon: allowed relative error.
verifyMatrix(Allocation ref, Allocation out, boolean isUpperMatrix)128     private void verifyMatrix(Allocation ref, Allocation out, boolean isUpperMatrix) {
129         double l2Norm;
130         int size;
131         Element e = ref.getType().getElement();
132         if (e.isCompatible(Element.F32(mRS)) || e.isCompatible(Element.F32_2(mRS))) {
133             size = out.getBytesSize() / 4;
134             float[] outArr = new float[size];
135             float[] refArr = new float[size];
136             out.copyTo(outArr);
137             ref.copyTo(refArr);
138 
139             double l2NormOut = calcL2Norm(outArr);
140             double l2NormRef = calcL2Norm(refArr);
141             l2Norm = (l2NormOut < l2NormRef ? l2NormOut : l2NormRef) / size;
142         } else {
143             size = out.getBytesSize() / 8;
144             double[] outArr = new double[size];
145             double[] refArr = new double[size];
146             out.copyTo(outArr);
147             ref.copyTo(refArr);
148 
149             double l2NormOut = calcL2Norm(outArr);
150             double l2NormRef = calcL2Norm(refArr);
151             l2Norm = (l2NormOut < l2NormRef ? l2NormOut : l2NormRef) / size;
152         }
153         mVerify.invoke_verifyMatrix(ref, out, l2Norm, isUpperMatrix);
154     }
155 
156 
validateSide(int Side)157     private boolean validateSide(int Side) {
158         if (Side != ScriptIntrinsicBLAS.LEFT && Side != ScriptIntrinsicBLAS.RIGHT) {
159             return false;
160         }
161         return true;
162     }
163 
validateTranspose(int Trans)164     private boolean validateTranspose(int Trans) {
165         if (Trans != ScriptIntrinsicBLAS.NO_TRANSPOSE &&
166             Trans != ScriptIntrinsicBLAS.TRANSPOSE &&
167             Trans != ScriptIntrinsicBLAS.CONJ_TRANSPOSE) {
168             return false;
169         }
170         return true;
171     }
172 
validateConjTranspose(int Trans)173     private boolean validateConjTranspose(int Trans) {
174         if (Trans != ScriptIntrinsicBLAS.NO_TRANSPOSE &&
175             Trans != ScriptIntrinsicBLAS.CONJ_TRANSPOSE) {
176             return false;
177         }
178         return true;
179     }
180 
validateDiag(int Diag)181     private boolean validateDiag(int Diag) {
182         if (Diag != ScriptIntrinsicBLAS.NON_UNIT &&
183             Diag != ScriptIntrinsicBLAS.UNIT) {
184             return false;
185         }
186         return true;
187     }
188 
validateUplo(int Uplo)189     private boolean validateUplo(int Uplo) {
190         if (Uplo != ScriptIntrinsicBLAS.UPPER &&
191             Uplo != ScriptIntrinsicBLAS.LOWER) {
192             return false;
193         }
194         return true;
195     }
196 
validateVecInput(Allocation X)197     private boolean validateVecInput(Allocation X) {
198         if (X.getType().getY() > 2) {
199             // For testing vector, need a mismatch Y for complete test coverage.
200             return false;
201         }
202         return true;
203     }
204 
validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY)205     private boolean validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
206         if (!validateTranspose(TransA)) {
207             return false;
208         }
209         int M = A.getType().getY();
210         int N = A.getType().getX();
211         if (!A.getType().getElement().isCompatible(e) ||
212             !X.getType().getElement().isCompatible(e) ||
213             !Y.getType().getElement().isCompatible(e)) {
214             return false;
215         }
216         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
217             return false;
218         }
219 
220         if (incX <= 0 || incY <= 0) {
221             return false;
222         }
223         int expectedXDim = -1, expectedYDim = -1;
224         if (TransA == ScriptIntrinsicBLAS.NO_TRANSPOSE) {
225             expectedXDim = 1 + (N - 1) * incX;
226             expectedYDim = 1 + (M - 1) * incY;
227         } else {
228             expectedXDim = 1 + (M - 1) * incX;
229             expectedYDim = 1 + (N - 1) * incY;
230         }
231         if (X.getType().getX() != expectedXDim ||
232             Y.getType().getX() != expectedYDim) {
233             return false;
234         }
235         return true;
236     }
237 
xGEMV_API_test(int trans, int incX, int incY, ArrayList<Allocation> mMatrix)238     private void xGEMV_API_test(int trans, int incX, int incY, ArrayList<Allocation> mMatrix) {
239         for (Allocation matA : mMatrix) {
240             for (Allocation vecX : mMatrix) {
241                 if (!validateVecInput(vecX)) {
242                     continue;
243                 }
244                 for (Allocation vecY : mMatrix) {
245                     if (!validateVecInput(vecY)) {
246                         continue;
247                     }
248                     Element elemA = matA.getType().getElement();
249                     if (validateGEMV(elemA, trans, matA, vecX, incX, vecY, incY)) {
250                         try {
251                             if (elemA.isCompatible(Element.F32(mRS))) {
252                                 mBLAS.SGEMV(trans, alphaS, matA, vecX, incX, betaS, vecY, incY);
253                             } else if (elemA.isCompatible(Element.F64(mRS))) {
254                                 mBLAS.DGEMV(trans, alphaD, matA, vecX, incX, betaD, vecY, incY);
255                             } else if (elemA.isCompatible(Element.F32_2(mRS))) {
256                                 mBLAS.CGEMV(trans, alphaC, matA, vecX, incX, betaC, vecY, incY);
257                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
258                                 mBLAS.ZGEMV(trans, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
259                             }
260                         } catch (RSRuntimeException e) {
261                             fail("should NOT throw RSRuntimeException");
262                         }
263                     } else {
264                         try {
265                             mBLAS.SGEMV(trans, alphaS, matA, vecX, incX, betaS, vecY, incY);
266                             fail("should throw RSRuntimeException for SGEMV");
267                         } catch (RSRuntimeException e) {
268                         }
269                         try {
270                             mBLAS.DGEMV(trans, alphaD, matA, vecX, incX, betaD, vecY, incY);
271                             fail("should throw RSRuntimeException for DGEMV");
272                         } catch (RSRuntimeException e) {
273                         }
274                         try {
275                             mBLAS.CGEMV(trans, alphaC, matA, vecX, incX, betaC, vecY, incY);
276                             fail("should throw RSRuntimeException for CGEMV");
277                         } catch (RSRuntimeException e) {
278                         }
279                         try {
280                             mBLAS.ZGEMV(trans, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
281                             fail("should throw RSRuntimeException for ZGEMV");
282                         } catch (RSRuntimeException e) {
283                         }
284                     }
285                 }
286             }
287         }
288     }
289 
L2_xGEMV_API(ArrayList<Allocation> mMatrix)290     public void L2_xGEMV_API(ArrayList<Allocation> mMatrix) {
291         for (int trans : mTranspose) {
292             for (int incX : mInc) {
293                 xGEMV_API_test(trans, incX, incX, mMatrix);
294             }
295         }
296     }
297 
test_L2_SGEMV_API()298     public void test_L2_SGEMV_API() {
299         L2_xGEMV_API(mMatrixS);
300     }
301 
test_L2_DGEMV_API()302     public void test_L2_DGEMV_API() {
303         L2_xGEMV_API(mMatrixD);
304     }
305 
test_L2_CGEMV_API()306     public void test_L2_CGEMV_API() {
307         L2_xGEMV_API(mMatrixC);
308     }
309 
test_L2_ZGEMV_API()310     public void test_L2_ZGEMV_API() {
311         L2_xGEMV_API(mMatrixZ);
312     }
313 
test_L2_SGEMV_Correctness()314     public void test_L2_SGEMV_Correctness() {
315         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
316         int incX = 1;
317         int incY = 1;
318 
319         // Populate input allocations
320         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
321         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
322         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1));
323         matrixAS.copyFrom(mBLASData.L2_sGEMV_A_mn);
324         vectorXS.copyFrom(mBLASData.L2_sGEMV_x_n1);
325         vectorYS.copyFrom(mBLASData.L2_sGEMV_y_m1);
326 
327         // Test for the default case: NO_TRANS
328         mBLAS.SGEMV(trans, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
329         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1));
330         vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_N);
331         verifyMatrix(vectorYRef, vectorYS);
332 
333         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
334         trans = ScriptIntrinsicBLAS.TRANSPOSE;
335         // Reload vector Y, since it was overwritten by BLAS.
336         vectorYS.copyFrom(mBLASData.L2_sGEMV_y_m1);
337         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
338         mBLAS.SGEMV(trans, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX);
339         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
340         vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_T);
341         verifyMatrix(vectorYRef, vectorXS);
342 
343         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
344         vectorXS.copyFrom(mBLASData.L2_sGEMV_x_n1);
345         mBLAS.SGEMV(trans, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX);
346         vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_H);
347         verifyMatrix(vectorYRef, vectorXS);
348 
349         // Test for incX = 2 & incY = 3;
350         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
351         incX = 2;
352         incY = 3;
353         int dimX = 1 + (mBLASData.dN - 1) * incX;
354         int dimY = 1 + (mBLASData.dM - 1) * incY;
355         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
356         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
357         vectorXS.copyFrom(mBLASData.L2_sGEMV_x_n2);
358         vectorYS.copyFrom(mBLASData.L2_sGEMV_y_m2);
359 
360         mBLAS.SGEMV(trans, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
361         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
362         vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_N2);
363         verifyMatrix(vectorYRef, vectorYS);
364 
365         mRS.finish();
366         checkError();
367     }
368 
test_L2_DGEMV_Correctness()369     public void test_L2_DGEMV_Correctness() {
370         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
371         int incX = 1;
372         int incY = 1;
373 
374         // Populate input allocations
375         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
376         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
377         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1));
378         matrixAD.copyFrom(mBLASData.L2_dGEMV_A_mn);
379         vectorXD.copyFrom(mBLASData.L2_dGEMV_x_n1);
380         vectorYD.copyFrom(mBLASData.L2_dGEMV_y_m1);
381 
382         // Test for the default case: NO_TRANS
383         mBLAS.DGEMV(trans, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
384         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1));
385         vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_N);
386         verifyMatrix(vectorYRef, vectorYD);
387 
388         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
389         trans = ScriptIntrinsicBLAS.TRANSPOSE;
390         // Reload vector Y, since it was overwritten by BLAS.
391         vectorYD.copyFrom(mBLASData.L2_dGEMV_y_m1);
392         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
393         mBLAS.DGEMV(trans, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX);
394         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
395         vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_T);
396         verifyMatrix(vectorYRef, vectorXD);
397 
398         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
399         vectorXD.copyFrom(mBLASData.L2_dGEMV_x_n1);
400         mBLAS.DGEMV(trans, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX);
401         vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_H);
402         verifyMatrix(vectorYRef, vectorXD);
403 
404         // Test for incX = 2 & incY = 3;
405         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
406         incX = 2;
407         incY = 3;
408         int dimX = 1 + (mBLASData.dN - 1) * incX;
409         int dimY = 1 + (mBLASData.dM - 1) * incY;
410         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
411         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
412         vectorXD.copyFrom(mBLASData.L2_dGEMV_x_n2);
413         vectorYD.copyFrom(mBLASData.L2_dGEMV_y_m2);
414 
415         mBLAS.DGEMV(trans, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
416         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
417         vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_N2);
418         verifyMatrix(vectorYRef, vectorYD);
419 
420         mRS.finish();
421         checkError();
422     }
423 
test_L2_CGEMV_Correctness()424     public void test_L2_CGEMV_Correctness() {
425         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
426         int incX = 1;
427         int incY = 1;
428 
429         // Populate input allocations
430         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
431         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
432         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1));
433         matrixAC.copyFrom(mBLASData.L2_cGEMV_A_mn);
434         vectorXC.copyFrom(mBLASData.L2_cGEMV_x_n1);
435         vectorYC.copyFrom(mBLASData.L2_cGEMV_y_m1);
436 
437         // Test for the default case: NO_TRANS
438         mBLAS.CGEMV(trans, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
439         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1));
440         vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_N);
441         verifyMatrix(vectorYRef, vectorYC);
442 
443         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
444         trans = ScriptIntrinsicBLAS.TRANSPOSE;
445         // Reload vector Y, since it was overwritten by BLAS.
446         vectorYC.copyFrom(mBLASData.L2_cGEMV_y_m1);
447         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
448         mBLAS.CGEMV(trans, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX);
449         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
450         vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_T);
451         verifyMatrix(vectorYRef, vectorXC);
452 
453         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
454         vectorXC.copyFrom(mBLASData.L2_cGEMV_x_n1);
455         mBLAS.CGEMV(trans, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX);
456         vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_H);
457         verifyMatrix(vectorYRef, vectorXC);
458 
459         // Test for incX = 2 & incY = 3;
460         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
461         incX = 2;
462         incY = 3;
463         int dimX = 1 + (mBLASData.dN - 1) * incX;
464         int dimY = 1 + (mBLASData.dM - 1) * incY;
465         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
466         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
467         vectorXC.copyFrom(mBLASData.L2_cGEMV_x_n2);
468         vectorYC.copyFrom(mBLASData.L2_cGEMV_y_m2);
469 
470         mBLAS.CGEMV(trans, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
471         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
472         vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_N2);
473         verifyMatrix(vectorYRef, vectorYC);
474 
475         mRS.finish();
476         checkError();
477     }
478 
test_L2_ZGEMV_Correctness()479     public void test_L2_ZGEMV_Correctness() {
480         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
481         int incX = 1;
482         int incY = 1;
483 
484         // Populate input allocations
485         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
486         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
487         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1));
488         matrixAZ.copyFrom(mBLASData.L2_zGEMV_A_mn);
489         vectorXZ.copyFrom(mBLASData.L2_zGEMV_x_n1);
490         vectorYZ.copyFrom(mBLASData.L2_zGEMV_y_m1);
491 
492         // Test for the default case: NO_TRANS
493         mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
494         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1));
495         vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_N);
496         verifyMatrix(vectorYRef, vectorYZ);
497 
498         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
499         trans = ScriptIntrinsicBLAS.TRANSPOSE;
500         // Reload vector Y, since it was overwritten by BLAS.
501         vectorYZ.copyFrom(mBLASData.L2_zGEMV_y_m1);
502         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
503         mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorYZ, incY, betaZ, vectorXZ, incX);
504         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
505         vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_T);
506         verifyMatrix(vectorYRef, vectorXZ);
507 
508         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
509         vectorXZ.copyFrom(mBLASData.L2_zGEMV_x_n1);
510         mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorYZ, incY, betaZ, vectorXZ, incX);
511         vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_H);
512         verifyMatrix(vectorYRef, vectorXZ);
513 
514         // Test for incX = 2 & incY = 3;
515         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
516         incX = 2;
517         incY = 3;
518         int dimX = 1 + (mBLASData.dN - 1) * incX;
519         int dimY = 1 + (mBLASData.dM - 1) * incY;
520         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
521         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
522         vectorXZ.copyFrom(mBLASData.L2_zGEMV_x_n2);
523         vectorYZ.copyFrom(mBLASData.L2_zGEMV_y_m2);
524 
525         mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
526         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
527         vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_N2);
528         verifyMatrix(vectorYRef, vectorYZ);
529 
530         mRS.finish();
531         checkError();
532     }
533 
534 
535 
xGBMV_API_test(int trans, int KL, int KU, int incX, int incY, ArrayList<Allocation> mMatrix)536     private void xGBMV_API_test(int trans, int KL, int KU, int incX, int incY, ArrayList<Allocation> mMatrix) {
537         for (Allocation matA : mMatrix) {
538             for (Allocation vecX : mMatrix) {
539                 if (!validateVecInput(vecX)) {
540                     continue;
541                 }
542                 for (Allocation vecY : mMatrix) {
543                     if (!validateVecInput(vecY)) {
544                         continue;
545                     }
546                     Element elemA = matA.getType().getElement();
547                     if (validateGEMV(elemA, trans, matA, vecX, incX, vecY, incY) && KU >= 0 && KL >= 0) {
548                         try {
549                             if (elemA.isCompatible(Element.F32(mRS))) {
550                                 mBLAS.SGBMV(trans, KL, KU, alphaS, matA, vecX, incX, betaS, vecY, incY);
551                             } else if (elemA.isCompatible(Element.F64(mRS))) {
552                                 mBLAS.DGBMV(trans, KL, KU, alphaD, matA, vecX, incX, betaD, vecY, incY);
553                             } else if (elemA.isCompatible(Element.F32_2(mRS))) {
554                                 mBLAS.CGBMV(trans, KL, KU, alphaC, matA, vecX, incX, betaC, vecY, incY);
555                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
556                                 mBLAS.ZGBMV(trans, KL, KU, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
557                             }
558                         } catch (RSRuntimeException e) {
559                             fail("should NOT throw RSRuntimeException");
560                         }
561                     } else {
562                         try {
563                             mBLAS.SGBMV(trans, KL, KU, alphaS, matA, vecX, incX, betaS, vecY, incY);
564                             fail("should throw RSRuntimeException for SGBMV");
565                         } catch (RSRuntimeException e) {
566                         }
567                         try {
568                             mBLAS.DGBMV(trans, KL, KU, alphaD, matA, vecX, incX, betaD, vecY, incY);
569                             fail("should throw RSRuntimeException for DGBMV");
570                         } catch (RSRuntimeException e) {
571                         }
572                         try {
573                             mBLAS.CGBMV(trans, KL, KU, alphaC, matA, vecX, incX, betaC, vecY, incY);
574                             fail("should throw RSRuntimeException for CGBMV");
575                         } catch (RSRuntimeException e) {
576                         }
577                         try {
578                             mBLAS.ZGBMV(trans, KL, KU, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
579                             fail("should throw RSRuntimeException for ZGBMV");
580                         } catch (RSRuntimeException e) {
581                         }
582                     }
583                 }
584             }
585         }
586     }
587 
L2_xGBMV_API(ArrayList<Allocation> mMatrix)588     public void L2_xGBMV_API(ArrayList<Allocation> mMatrix) {
589         for (int trans : mTranspose) {
590             for (int incX : mInc) {
591                 for (int K : mK) {
592                     xGBMV_API_test(trans, K, K, incX, incX, mMatrix);
593                 }
594             }
595         }
596     }
597 
test_L2_SGBMV_API()598     public void test_L2_SGBMV_API() {
599         L2_xGBMV_API(mMatrixS);
600     }
601 
test_L2_DGBMV_API()602     public void test_L2_DGBMV_API() {
603         L2_xGBMV_API(mMatrixD);
604     }
605 
test_L2_CGBMV_API()606     public void test_L2_CGBMV_API() {
607         L2_xGBMV_API(mMatrixC);
608     }
609 
test_L2_ZGBMV_API()610     public void test_L2_ZGBMV_API() {
611         L2_xGBMV_API(mMatrixZ);
612     }
613 
test_L2_SGBMV_Correctness()614     public void test_L2_SGBMV_Correctness() {
615         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
616         int incX = 1;
617         int incY = 1;
618 
619         // Populate input allocations
620         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
621         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
622         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1));
623         matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_sGBMV_A_mn);
624         vectorXS.copyFrom(mBLASData.L2_sGBMV_x_n1);
625         vectorYS.copyFrom(mBLASData.L2_sGBMV_y_m1);
626 
627         // Test for the default case: NO_TRANS
628         mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
629         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1));
630         vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_N);
631         verifyMatrix(vectorYRef, vectorYS);
632 
633         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
634         trans = ScriptIntrinsicBLAS.TRANSPOSE;
635         // Reload vector Y, since it was overwritten by BLAS.
636         vectorYS.copyFrom(mBLASData.L2_sGBMV_y_m1);
637         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
638         mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX);
639         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
640         vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_T);
641         verifyMatrix(vectorYRef, vectorXS);
642 
643         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
644         vectorXS.copyFrom(mBLASData.L2_sGBMV_x_n1);
645         mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX);
646         vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_H);
647         verifyMatrix(vectorYRef, vectorXS);
648 
649         // Test for incX = 2 & incY = 3;
650         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
651         incX = 2;
652         incY = 3;
653         int dimX = 1 + (mBLASData.dN - 1) * incX;
654         int dimY = 1 + (mBLASData.dM - 1) * incY;
655         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
656         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
657         vectorXS.copyFrom(mBLASData.L2_sGBMV_x_n2);
658         vectorYS.copyFrom(mBLASData.L2_sGBMV_y_m2);
659 
660         mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
661         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
662         vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_N2);
663         verifyMatrix(vectorYRef, vectorYS);
664 
665         mRS.finish();
666         checkError();
667     }
668 
test_L2_DGBMV_Correctness()669     public void test_L2_DGBMV_Correctness() {
670         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
671         int incX = 1;
672         int incY = 1;
673 
674         // Populate input allocations
675         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
676         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
677         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1));
678         matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_dGBMV_A_mn);
679         vectorXD.copyFrom(mBLASData.L2_dGBMV_x_n1);
680         vectorYD.copyFrom(mBLASData.L2_dGBMV_y_m1);
681 
682         // Test for the default case: NO_TRANS
683         mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
684         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1));
685         vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_N);
686         verifyMatrix(vectorYRef, vectorYD);
687 
688         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
689         trans = ScriptIntrinsicBLAS.TRANSPOSE;
690         // Reload vector Y, since it was overwritten by BLAS.
691         vectorYD.copyFrom(mBLASData.L2_dGBMV_y_m1);
692         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
693         mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX);
694         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
695         vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_T);
696         verifyMatrix(vectorYRef, vectorXD);
697 
698         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
699         vectorXD.copyFrom(mBLASData.L2_dGBMV_x_n1);
700         mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX);
701         vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_H);
702         verifyMatrix(vectorYRef, vectorXD);
703 
704         // Test for incX = 2 & incY = 3;
705         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
706         incX = 2;
707         incY = 3;
708         int dimX = 1 + (mBLASData.dN - 1) * incX;
709         int dimY = 1 + (mBLASData.dM - 1) * incY;
710         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
711         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
712         vectorXD.copyFrom(mBLASData.L2_dGBMV_x_n2);
713         vectorYD.copyFrom(mBLASData.L2_dGBMV_y_m2);
714 
715         mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
716         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
717         vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_N2);
718         verifyMatrix(vectorYRef, vectorYD);
719 
720         mRS.finish();
721         checkError();
722     }
723 
test_L2_CGBMV_Correctness()724     public void test_L2_CGBMV_Correctness() {
725         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
726         int incX = 1;
727         int incY = 1;
728 
729         // Populate input allocations
730         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
731         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
732         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1));
733         matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_cGBMV_A_mn);
734         vectorXC.copyFrom(mBLASData.L2_cGBMV_x_n1);
735         vectorYC.copyFrom(mBLASData.L2_cGBMV_y_m1);
736 
737         // Test for the default case: NO_TRANS
738         mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
739         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1));
740         vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_N);
741         verifyMatrix(vectorYRef, vectorYC);
742 
743         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
744         trans = ScriptIntrinsicBLAS.TRANSPOSE;
745         // Reload vector Y, since it was overwritten by BLAS.
746         vectorYC.copyFrom(mBLASData.L2_cGBMV_y_m1);
747         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
748         mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX);
749         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
750         vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_T);
751         verifyMatrix(vectorYRef, vectorXC);
752 
753         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
754         vectorXC.copyFrom(mBLASData.L2_cGBMV_x_n1);
755         mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX);
756         vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_H);
757         verifyMatrix(vectorYRef, vectorXC);
758 
759         // Test for incX = 2 & incY = 3;
760         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
761         incX = 2;
762         incY = 3;
763         int dimX = 1 + (mBLASData.dN - 1) * incX;
764         int dimY = 1 + (mBLASData.dM - 1) * incY;
765         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
766         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
767         vectorXC.copyFrom(mBLASData.L2_cGBMV_x_n2);
768         vectorYC.copyFrom(mBLASData.L2_cGBMV_y_m2);
769 
770         mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
771         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
772         vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_N2);
773         verifyMatrix(vectorYRef, vectorYC);
774 
775         mRS.finish();
776         checkError();
777     }
778 
test_L2_ZGBMV_Correctness()779     public void test_L2_ZGBMV_Correctness() {
780         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
781         int incX = 1;
782         int incY = 1;
783 
784         // Populate input allocations
785         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
786         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
787         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1));
788         matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_zGBMV_A_mn);
789         vectorXZ.copyFrom(mBLASData.L2_zGBMV_x_n1);
790         vectorYZ.copyFrom(mBLASData.L2_zGBMV_y_m1);
791 
792         // Test for the default case: NO_TRANS
793         mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
794         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1));
795         vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_N);
796         verifyMatrix(vectorYRef, vectorYZ);
797 
798         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
799         trans = ScriptIntrinsicBLAS.TRANSPOSE;
800         // Reload vector Y, since it was overwritten by BLAS.
801         vectorYZ.copyFrom(mBLASData.L2_zGBMV_y_m1);
802         // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T
803         mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorYZ, incY, betaZ, vectorXZ, incX);
804         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
805         vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_T);
806         verifyMatrix(vectorYRef, vectorXZ);
807 
808         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
809         vectorXZ.copyFrom(mBLASData.L2_zGBMV_x_n1);
810         mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorYZ, incX, betaZ, vectorXZ, incY);
811         vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_H);
812         verifyMatrix(vectorYRef, vectorXZ);
813 
814         // Test for incX = 2 & incY = 3;
815         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
816         incX = 2;
817         incY = 3;
818         int dimX = 1 + (mBLASData.dN - 1) * incX;
819         int dimY = 1 + (mBLASData.dM - 1) * incY;
820         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
821         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
822         vectorXZ.copyFrom(mBLASData.L2_zGBMV_x_n2);
823         vectorYZ.copyFrom(mBLASData.L2_zGBMV_y_m2);
824 
825         mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
826         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
827         vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_N2);
828         verifyMatrix(vectorYRef, vectorYZ);
829 
830         mRS.finish();
831         checkError();
832     }
833 
834 
xHEMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)835     private void xHEMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
836         for (Allocation matA : mMatrix) {
837             for (Allocation vecX : mMatrix) {
838                 if (!validateVecInput(vecX)) {
839                     continue;
840                 }
841                 for (Allocation vecY : mMatrix) {
842                     if (!validateVecInput(vecY)) {
843                         continue;
844                     }
845                     Element elemA = matA.getType().getElement();
846                     if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
847                         try {
848                             if (elemA.isCompatible(Element.F32_2(mRS))) {
849                                 mBLAS.CHEMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY);
850                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
851                                 mBLAS.ZHEMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
852                             }
853                         } catch (RSRuntimeException e) {
854                             fail("should NOT throw RSRuntimeException");
855                         }
856                     } else {
857                         try {
858                             mBLAS.CHEMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY);
859                             fail("should throw RSRuntimeException for CHEMV");
860                         } catch (RSRuntimeException e) {
861                         }
862                         try {
863                             mBLAS.ZHEMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
864                             fail("should throw RSRuntimeException for ZHEMV");
865                         } catch (RSRuntimeException e) {
866                         }
867                     }
868                 }
869             }
870         }
871     }
872 
L2_xHEMV_API(ArrayList<Allocation> mMatrix)873     public void L2_xHEMV_API(ArrayList<Allocation> mMatrix) {
874         for (int Uplo : mUplo) {
875             for (int incX : mInc) {
876                 xHEMV_API_test(Uplo, incX, incX, mMatrix);
877             }
878         }
879     }
880 
test_L2_CHEMV_API()881     public void test_L2_CHEMV_API() {
882         L2_xHEMV_API(mMatrixC);
883     }
884 
test_L2_ZHEMV_API()885     public void test_L2_ZHEMV_API() {
886         L2_xHEMV_API(mMatrixZ);
887     }
888 
test_L2_CHEMV_Correctness()889     public void test_L2_CHEMV_Correctness() {
890         int uplo = ScriptIntrinsicBLAS.UPPER;
891         int incX = 1;
892         int incY = 1;
893 
894         // Populate input allocations
895         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
896         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
897         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
898         matrixAC.copyFrom(mBLASData.L2_cHEMV_A_nn);
899         vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n1);
900         vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n1);
901 
902         // Test for the default case:
903         mBLAS.CHEMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
904         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
905         vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N);
906         verifyMatrix(vectorYRef, vectorYC);
907 
908         // Test for incX = 2 & incY = 3;
909         incX = 2;
910         incY = 3;
911         int dimX = 1 + (mBLASData.dN - 1) * incX;
912         int dimY = 1 + (mBLASData.dN - 1) * incY;
913         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
914         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
915         vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n2);
916         vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n2);
917 
918         mBLAS.CHEMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
919         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
920         vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N2);
921         verifyMatrix(vectorYRef, vectorYC);
922 
923         mRS.finish();
924         checkError();
925     }
926 
test_L2_ZHEMV_Correctness()927     public void test_L2_ZHEMV_Correctness() {
928         int uplo = ScriptIntrinsicBLAS.UPPER;
929         int incX = 1;
930         int incY = 1;
931 
932         // Populate input allocations
933         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
934         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
935         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
936         matrixAZ.copyFrom(mBLASData.L2_zHEMV_A_nn);
937         vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n1);
938         vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n1);
939 
940         // Test for the default case: NO_TRANS
941         mBLAS.ZHEMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
942         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
943         vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N);
944         verifyMatrix(vectorYRef, vectorYZ);
945 
946         // Test for incX = 2 & incY = 3;
947         incX = 2;
948         incY = 3;
949         int dimX = 1 + (mBLASData.dN - 1) * incX;
950         int dimY = 1 + (mBLASData.dN - 1) * incY;
951         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
952         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
953         vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n2);
954         vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n2);
955 
956         mBLAS.ZHEMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
957         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
958         vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N2);
959         verifyMatrix(vectorYRef, vectorYZ);
960 
961         mRS.finish();
962         checkError();
963     }
964 
965 
966 
xHBMV_API_test(int Uplo, int K, int incX, int incY, ArrayList<Allocation> mMatrix)967     private void xHBMV_API_test(int Uplo, int K, int incX, int incY, ArrayList<Allocation> mMatrix) {
968         for (Allocation matA : mMatrix) {
969             for (Allocation vecX : mMatrix) {
970                 if (!validateVecInput(vecX)) {
971                     continue;
972                 }
973                 for (Allocation vecY : mMatrix) {
974                     if (!validateVecInput(vecY)) {
975                         continue;
976                     }
977                     Element elemA = matA.getType().getElement();
978                     if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA) && K >= 0) {
979                         try {
980                             if (elemA.isCompatible(Element.F32_2(mRS))) {
981                                 mBLAS.CHBMV(Uplo, K, alphaC, matA, vecX, incX, betaC, vecY, incY);
982                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
983                                 mBLAS.ZHBMV(Uplo, K, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
984                             }
985                         } catch (RSRuntimeException e) {
986                             fail("should NOT throw RSRuntimeException");
987                         }
988                     } else {
989                         try {
990                             mBLAS.CHBMV(Uplo, K, alphaC, matA, vecX, incX, betaC, vecY, incY);
991                             fail("should throw RSRuntimeException for CHBMV");
992                         } catch (RSRuntimeException e) {
993                         }
994                         try {
995                             mBLAS.ZHBMV(Uplo, K, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
996                             fail("should throw RSRuntimeException for ZHBMV");
997                         } catch (RSRuntimeException e) {
998                         }
999                     }
1000                 }
1001             }
1002         }
1003     }
1004 
L2_xHBMV_API(ArrayList<Allocation> mMatrix)1005     public void L2_xHBMV_API(ArrayList<Allocation> mMatrix) {
1006         for (int Uplo : mUplo) {
1007             for (int K : mK) {
1008                 for (int incX : mInc) {
1009                         xHBMV_API_test(Uplo, K, incX, incX, mMatrix);
1010                 }
1011             }
1012         }
1013     }
1014 
test_L2_CHBMV_API()1015     public void test_L2_CHBMV_API() {
1016         L2_xHBMV_API(mMatrixC);
1017     }
1018 
test_L2_ZHBMV_API()1019     public void test_L2_ZHBMV_API() {
1020         L2_xHBMV_API(mMatrixZ);
1021     }
1022 
test_L2_CHBMV_Correctness()1023     public void test_L2_CHBMV_Correctness() {
1024         int uplo = ScriptIntrinsicBLAS.UPPER;
1025         int incX = 1;
1026         int incY = 1;
1027 
1028         // Populate input allocations
1029         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
1030         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
1031         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
1032         matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_cHBMV_A_nn);
1033         vectorXC.copyFrom(mBLASData.L2_cHBMV_x_n1);
1034         vectorYC.copyFrom(mBLASData.L2_cHBMV_y_n1);
1035 
1036         // Test for the default case:
1037         mBLAS.CHBMV(uplo, mBLASData.KL, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
1038         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
1039         vectorYRef.copyFrom(mBLASData.L2_cHBMV_o_N);
1040         verifyMatrix(vectorYRef, vectorYC);
1041 
1042         // Test for incX = 2 & incY = 3;
1043         incX = 2;
1044         incY = 3;
1045         int dimX = 1 + (mBLASData.dN - 1) * incX;
1046         int dimY = 1 + (mBLASData.dN - 1) * incY;
1047         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
1048         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
1049         vectorXC.copyFrom(mBLASData.L2_cHBMV_x_n2);
1050         vectorYC.copyFrom(mBLASData.L2_cHBMV_y_n2);
1051 
1052         mBLAS.CHBMV(uplo, mBLASData.KL, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
1053         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
1054         vectorYRef.copyFrom(mBLASData.L2_cHBMV_o_N2);
1055         verifyMatrix(vectorYRef, vectorYC);
1056 
1057         mRS.finish();
1058         checkError();
1059     }
1060 
test_L2_ZHBMV_Correctness()1061     public void test_L2_ZHBMV_Correctness() {
1062         int uplo = ScriptIntrinsicBLAS.UPPER;
1063         int incX = 1;
1064         int incY = 1;
1065 
1066         // Populate input allocations
1067         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
1068         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
1069         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
1070         matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_zHBMV_A_nn);
1071         vectorXZ.copyFrom(mBLASData.L2_zHBMV_x_n1);
1072         vectorYZ.copyFrom(mBLASData.L2_zHBMV_y_n1);
1073 
1074         // Test for the default case: NO_TRANS
1075         mBLAS.ZHBMV(uplo, mBLASData.KL, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
1076         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
1077         vectorYRef.copyFrom(mBLASData.L2_zHBMV_o_N);
1078         verifyMatrix(vectorYRef, vectorYZ);
1079 
1080         // Test for incX = 2 & incY = 3;
1081         incX = 2;
1082         incY = 3;
1083         int dimX = 1 + (mBLASData.dN - 1) * incX;
1084         int dimY = 1 + (mBLASData.dN - 1) * incY;
1085         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
1086         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
1087         vectorXZ.copyFrom(mBLASData.L2_zHBMV_x_n2);
1088         vectorYZ.copyFrom(mBLASData.L2_zHBMV_y_n2);
1089 
1090         mBLAS.ZHBMV(uplo, mBLASData.KL, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
1091         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
1092         vectorYRef.copyFrom(mBLASData.L2_zHBMV_o_N2);
1093         verifyMatrix(vectorYRef, vectorYZ);
1094 
1095         mRS.finish();
1096         checkError();
1097     }
1098 
1099 
xHPMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)1100     private void xHPMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
1101         for (Allocation matA : mMatrix) {
1102             for (Allocation vecX : mMatrix) {
1103                 if (!validateVecInput(vecX)) {
1104                     continue;
1105                 }
1106                 for (Allocation vecY : mMatrix) {
1107                     if (!validateVecInput(vecY)) {
1108                         continue;
1109                     }
1110                     Element elemA = matA.getType().getElement();
1111                     if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
1112                         try {
1113                             if (elemA.isCompatible(Element.F32_2(mRS))) {
1114                                 mBLAS.CHPMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY);
1115                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
1116                                 mBLAS.ZHPMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
1117                             }
1118                         } catch (RSRuntimeException e) {
1119                             fail("should NOT throw RSRuntimeException");
1120                         }
1121                     } else {
1122                         try {
1123                             mBLAS.CHPMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY);
1124                             fail("should throw RSRuntimeException for CHPMV");
1125                         } catch (RSRuntimeException e) {
1126                         }
1127                         try {
1128                             mBLAS.ZHPMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY);
1129                             fail("should throw RSRuntimeException for ZHPMV");
1130                         } catch (RSRuntimeException e) {
1131                         }
1132                     }
1133                 }
1134             }
1135         }
1136     }
1137 
L2_xHPMV_API(ArrayList<Allocation> mMatrix)1138     public void L2_xHPMV_API(ArrayList<Allocation> mMatrix) {
1139         for (int Uplo : mUplo) {
1140             for (int incX : mInc) {
1141                 xHPMV_API_test(Uplo, incX, incX, mMatrix);
1142             }
1143         }
1144     }
1145 
test_L2_CHPMV_API()1146     public void test_L2_CHPMV_API() {
1147         L2_xHPMV_API(mMatrixC);
1148     }
1149 
test_L2_ZHPMV_API()1150     public void test_L2_ZHPMV_API() {
1151         L2_xHPMV_API(mMatrixZ);
1152     }
1153 
test_L2_CHPMV_Correctness()1154     public void test_L2_CHPMV_Correctness() {
1155         int uplo = ScriptIntrinsicBLAS.UPPER;
1156         int incX = 1;
1157         int incY = 1;
1158 
1159         // Populate input allocations
1160         int N = mBLASData.dN;
1161         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
1162         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
1163         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
1164         matrixAC.copyFrom(mBLASData.L2_cHEMV_A_nn_pu);
1165         vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n1);
1166         vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n1);
1167 
1168         // Test for the default case:
1169         mBLAS.CHPMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
1170         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
1171         vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N);
1172         verifyMatrix(vectorYRef, vectorYC);
1173 
1174         // Test for incX = 2 & incY = 3;
1175         incX = 2;
1176         incY = 3;
1177         int dimX = 1 + (N - 1) * incX;
1178         int dimY = 1 + (N - 1) * incY;
1179         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
1180         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
1181         vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n2);
1182         vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n2);
1183 
1184         mBLAS.CHPMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY);
1185         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
1186         vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N2);
1187         verifyMatrix(vectorYRef, vectorYC);
1188 
1189         mRS.finish();
1190         checkError();
1191     }
1192 
test_L2_ZHPMV_Correctness()1193     public void test_L2_ZHPMV_Correctness() {
1194         int uplo = ScriptIntrinsicBLAS.UPPER;
1195         int incX = 1;
1196         int incY = 1;
1197 
1198         // Populate input allocations
1199         int N = mBLASData.dN;
1200         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
1201         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
1202         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
1203         matrixAZ.copyFrom(mBLASData.L2_zHEMV_A_nn_pu);
1204         vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n1);
1205         vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n1);
1206 
1207         // Test for the default case: NO_TRANS
1208         mBLAS.ZHPMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
1209         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
1210         vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N);
1211         verifyMatrix(vectorYRef, vectorYZ);
1212 
1213         // Test for incX = 2 & incY = 3;
1214         incX = 2;
1215         incY = 3;
1216         int dimX = 1 + (N - 1) * incX;
1217         int dimY = 1 + (N - 1) * incY;
1218         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
1219         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
1220         vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n2);
1221         vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n2);
1222 
1223         mBLAS.ZHPMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY);
1224         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
1225         vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N2);
1226         verifyMatrix(vectorYRef, vectorYZ);
1227 
1228         mRS.finish();
1229         checkError();
1230     }
1231 
1232 
validateSYMV(Element e, int Uplo, Allocation A, Allocation X, int incX, Allocation Y, int incY)1233     private boolean validateSYMV(Element e, int Uplo, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
1234         if (!validateUplo(Uplo)) {
1235             return false;
1236         }
1237         int N = A.getType().getY();
1238         if (A.getType().getX() != N) {
1239             return false;
1240         }
1241         if (!A.getType().getElement().isCompatible(e) ||
1242             !X.getType().getElement().isCompatible(e) ||
1243             !Y.getType().getElement().isCompatible(e) ) {
1244             return false;
1245         }
1246         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1247             return false;
1248         }
1249 
1250         if (incX <= 0 || incY <= 0) {
1251             return false;
1252         }
1253         int expectedXDim = 1 + (N - 1) * incX;
1254         if (X.getType().getX() != expectedXDim) {
1255             return false;
1256         }
1257         int expectedYDim = 1 + (N - 1) * incY;
1258         if (Y.getType().getX() != expectedYDim) {
1259             return false;
1260         }
1261         return true;
1262     }
1263 
xSYMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)1264     private void xSYMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
1265         for (Allocation matA : mMatrix) {
1266             for (Allocation vecX : mMatrix) {
1267                 if (!validateVecInput(vecX)) {
1268                     continue;
1269                 }
1270                 for (Allocation vecY : mMatrix) {
1271                     if (!validateVecInput(vecY)) {
1272                         continue;
1273                     }
1274                     Element elemA = matA.getType().getElement();
1275                     if (validateSYMV(elemA, Uplo, matA, vecX, incX, vecY, incY)) {
1276                         try {
1277                             if (elemA.isCompatible(Element.F32(mRS))) {
1278                                 mBLAS.SSYMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY);
1279                             } else if (elemA.isCompatible(Element.F64(mRS))) {
1280                                 mBLAS.DSYMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY);
1281                             }
1282                         } catch (RSRuntimeException e) {
1283                             fail("should NOT throw RSRuntimeException");
1284                         }
1285                     } else {
1286                         try {
1287                             mBLAS.SSYMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY);
1288                             fail("should throw RSRuntimeException for SSYMV");
1289                         } catch (RSRuntimeException e) {
1290                         }
1291                         try {
1292                             mBLAS.DSYMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY);
1293                             fail("should throw RSRuntimeException for DSYMV");
1294                         } catch (RSRuntimeException e) {
1295                         }
1296                     }
1297                 }
1298             }
1299         }
1300     }
1301 
L2_xSYMV_API(ArrayList<Allocation> mMatrix)1302     public void L2_xSYMV_API(ArrayList<Allocation> mMatrix) {
1303         for (int Uplo : mUplo) {
1304             for (int incX : mInc) {
1305                 xSYMV_API_test(Uplo, incX, incX, mMatrix);
1306             }
1307         }
1308     }
1309 
test_L2_SSYMV_API()1310     public void test_L2_SSYMV_API() {
1311         L2_xSYMV_API(mMatrixS);
1312     }
1313 
test_L2_DSYMV_API()1314     public void test_L2_DSYMV_API() {
1315         L2_xSYMV_API(mMatrixD);
1316     }
1317 
test_L2_SSYMV_Correctness()1318     public void test_L2_SSYMV_Correctness() {
1319         int uplo = ScriptIntrinsicBLAS.UPPER;
1320         int incX = 1;
1321         int incY = 1;
1322 
1323         // Populate input allocations
1324         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
1325         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
1326         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
1327         matrixAS.copyFrom(mBLASData.L2_sSYMV_A_nn);
1328         vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n1);
1329         vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n1);
1330 
1331         // Test for the default case:
1332         mBLAS.SSYMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
1333         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
1334         vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N);
1335         verifyMatrix(vectorYRef, vectorYS);
1336 
1337         // Test for incX = 2 & incY = 3;
1338         incX = 2;
1339         incY = 3;
1340         int dimX = 1 + (mBLASData.dN - 1) * incX;
1341         int dimY = 1 + (mBLASData.dN - 1) * incY;
1342         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
1343         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
1344         vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n2);
1345         vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n2);
1346 
1347         mBLAS.SSYMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
1348         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
1349         vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N2);
1350         verifyMatrix(vectorYRef, vectorYS);
1351 
1352         mRS.finish();
1353         checkError();
1354     }
1355 
test_L2_DSYMV_Correctness()1356     public void test_L2_DSYMV_Correctness() {
1357         int uplo = ScriptIntrinsicBLAS.UPPER;
1358         int incX = 1;
1359         int incY = 1;
1360 
1361         // Populate input allocations
1362         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
1363         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
1364         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
1365         matrixAD.copyFrom(mBLASData.L2_dSYMV_A_nn);
1366         vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n1);
1367         vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n1);
1368 
1369         // Test for the default case:
1370         mBLAS.DSYMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
1371         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
1372         vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N);
1373         verifyMatrix(vectorYRef, vectorYD);
1374 
1375         // Test for incX = 2 & incY = 3;
1376         incX = 2;
1377         incY = 3;
1378         int dimX = 1 + (mBLASData.dN - 1) * incX;
1379         int dimY = 1 + (mBLASData.dN - 1) * incY;
1380         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
1381         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
1382         vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n2);
1383         vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n2);
1384 
1385         mBLAS.DSYMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
1386         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
1387         vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N2);
1388         verifyMatrix(vectorYRef, vectorYD);
1389 
1390         mRS.finish();
1391         checkError();
1392     }
1393 
1394 
1395 
xSBMV_API_test(int Uplo, int K, int incX, int incY, ArrayList<Allocation> mMatrix)1396     private void xSBMV_API_test(int Uplo, int K, int incX, int incY, ArrayList<Allocation> mMatrix) {
1397         for (Allocation matA : mMatrix) {
1398             for (Allocation vecX : mMatrix) {
1399                 if (!validateVecInput(vecX)) {
1400                     continue;
1401                 }
1402                 for (Allocation vecY : mMatrix) {
1403                     if (!validateVecInput(vecY)) {
1404                         continue;
1405                     }
1406                     Element elemA = matA.getType().getElement();
1407                     if (validateSYMV(elemA, Uplo, matA, vecX, incX, vecY, incY) && K >= 0) {
1408                         try {
1409                             if (elemA.isCompatible(Element.F32(mRS))) {
1410                                 mBLAS.SSBMV(Uplo, K, alphaS, matA, vecX, incX, betaS, vecY, incY);
1411                             } else if (elemA.isCompatible(Element.F64(mRS))) {
1412                                 mBLAS.DSBMV(Uplo, K, alphaD, matA, vecX, incX, betaD, vecY, incY);
1413                             }
1414                         } catch (RSRuntimeException e) {
1415                             fail("should NOT throw RSRuntimeException");
1416                         }
1417                     } else {
1418                         try {
1419                             mBLAS.SSBMV(Uplo, K, alphaS, matA, vecX, incX, betaS, vecY, incY);
1420                             fail("should throw RSRuntimeException for SSBMV");
1421                         } catch (RSRuntimeException e) {
1422                         }
1423                         try {
1424                             mBLAS.DSBMV(Uplo, K, alphaD, matA, vecX, incX, betaD, vecY, incY);
1425                             fail("should throw RSRuntimeException for DSBMV");
1426                         } catch (RSRuntimeException e) {
1427                         }
1428                     }
1429                 }
1430             }
1431         }
1432     }
1433 
L2_xSBMV_API(ArrayList<Allocation> mMatrix)1434     public void L2_xSBMV_API(ArrayList<Allocation> mMatrix) {
1435         for (int Uplo : mUplo) {
1436             for (int K : mK) {
1437                 for (int incX : mInc) {
1438                     xSBMV_API_test(Uplo, K, incX, incX, mMatrix);
1439                 }
1440             }
1441         }
1442     }
1443 
test_L2_SSBMV_API()1444     public void test_L2_SSBMV_API() {
1445         L2_xSBMV_API(mMatrixS);
1446     }
1447 
test_L2_DSBMV_API()1448     public void test_L2_DSBMV_API() {
1449         L2_xSBMV_API(mMatrixD);
1450     }
1451 
test_L2_SSBMV_Correctness()1452     public void test_L2_SSBMV_Correctness() {
1453         int uplo = ScriptIntrinsicBLAS.UPPER;
1454         int incX = 1;
1455         int incY = 1;
1456 
1457         // Populate input allocations
1458         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
1459         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
1460         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
1461         matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_sSBMV_A_nn);
1462         vectorXS.copyFrom(mBLASData.L2_sSBMV_x_n1);
1463         vectorYS.copyFrom(mBLASData.L2_sSBMV_y_n1);
1464 
1465         // Test for the default case:
1466         mBLAS.SSBMV(uplo, mBLASData.KL, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
1467         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
1468         vectorYRef.copyFrom(mBLASData.L2_sSBMV_o_N);
1469         verifyMatrix(vectorYRef, vectorYS);
1470 
1471         // Test for incX = 2 & incY = 3;
1472         incX = 2;
1473         incY = 3;
1474         int dimX = 1 + (mBLASData.dN - 1) * incX;
1475         int dimY = 1 + (mBLASData.dN - 1) * incY;
1476         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
1477         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
1478         vectorXS.copyFrom(mBLASData.L2_sSBMV_x_n2);
1479         vectorYS.copyFrom(mBLASData.L2_sSBMV_y_n2);
1480 
1481         mBLAS.SSBMV(uplo, mBLASData.KL, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
1482         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
1483         vectorYRef.copyFrom(mBLASData.L2_sSBMV_o_N2);
1484         verifyMatrix(vectorYRef, vectorYS);
1485 
1486         mRS.finish();
1487         checkError();
1488     }
1489 
test_L2_DSBMV_Correctness()1490     public void test_L2_DSBMV_Correctness() {
1491         int uplo = ScriptIntrinsicBLAS.UPPER;
1492         int incX = 1;
1493         int incY = 1;
1494 
1495         // Populate input allocations
1496         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
1497         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
1498         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
1499         matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_dSBMV_A_nn);
1500         vectorXD.copyFrom(mBLASData.L2_dSBMV_x_n1);
1501         vectorYD.copyFrom(mBLASData.L2_dSBMV_y_n1);
1502 
1503         // Test for the default case:
1504         mBLAS.DSBMV(uplo, mBLASData.KL, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
1505         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
1506         vectorYRef.copyFrom(mBLASData.L2_dSBMV_o_N);
1507         verifyMatrix(vectorYRef, vectorYD);
1508 
1509         // Test for incX = 2 & incY = 3;
1510         incX = 2;
1511         incY = 3;
1512         int dimX = 1 + (mBLASData.dN - 1) * incX;
1513         int dimY = 1 + (mBLASData.dN - 1) * incY;
1514         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
1515         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
1516         vectorXD.copyFrom(mBLASData.L2_dSBMV_x_n2);
1517         vectorYD.copyFrom(mBLASData.L2_dSBMV_y_n2);
1518 
1519         mBLAS.DSBMV(uplo, mBLASData.KL, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
1520         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
1521         vectorYRef.copyFrom(mBLASData.L2_dSBMV_o_N2);
1522         verifyMatrix(vectorYRef, vectorYD);
1523 
1524         mRS.finish();
1525         checkError();
1526     }
1527 
1528 
validateSPMV(Element e, int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY)1529     private boolean validateSPMV(Element e, int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) {
1530         if (!validateUplo(Uplo)) {
1531             return false;
1532         }
1533         if (!Ap.getType().getElement().isCompatible(e) ||
1534             !X.getType().getElement().isCompatible(e) ||
1535             !Y.getType().getElement().isCompatible(e)) {
1536             return false;
1537         }
1538         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1539             return false;
1540         }
1541 
1542         if (Ap.getType().getY() > 1) {
1543             return false;
1544         }
1545 
1546         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1547         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1548             return false;
1549         }
1550         if (incX <= 0 || incY <= 0) {
1551             return false;
1552         }
1553         int expectedXDim = 1 + (N - 1) * incX;
1554         if (X.getType().getX() != expectedXDim) {
1555             return false;
1556         }
1557         int expectedYDim = 1 + (N - 1) * incY;
1558         if (Y.getType().getX() != expectedYDim) {
1559             return false;
1560         }
1561 
1562         return true;
1563     }
1564 
xSPMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)1565     private void xSPMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
1566         for (Allocation matA : mMatrix) {
1567             for (Allocation vecX : mMatrix) {
1568                 if (!validateVecInput(vecX)) {
1569                     continue;
1570                 }
1571                 for (Allocation vecY : mMatrix) {
1572                     if (!validateVecInput(vecY)) {
1573                         continue;
1574                     }
1575                     Element elemA = matA.getType().getElement();
1576                     if (validateSPMV(elemA, Uplo, matA, vecX, incX, vecY, incY)) {
1577                         try {
1578                             if (elemA.isCompatible(Element.F32(mRS))) {
1579                                 mBLAS.SSPMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY);
1580                             } else if (elemA.isCompatible(Element.F64(mRS))) {
1581                                 mBLAS.DSPMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY);
1582                             }
1583                         } catch (RSRuntimeException e) {
1584                             fail("should NOT throw RSRuntimeException");
1585                         }
1586                     } else {
1587                         try {
1588                             mBLAS.SSPMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY);
1589                             fail("should throw RSRuntimeException for SSPMV");
1590                         } catch (RSRuntimeException e) {
1591                         }
1592                         try {
1593                             mBLAS.DSPMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY);
1594                             fail("should throw RSRuntimeException for DSPMV");
1595                         } catch (RSRuntimeException e) {
1596                         }
1597                     }
1598                 }
1599             }
1600         }
1601     }
1602 
L2_xSPMV_API(ArrayList<Allocation> mMatrix)1603     public void L2_xSPMV_API(ArrayList<Allocation> mMatrix) {
1604         for (int Uplo : mUplo) {
1605             for (int incX : mInc) {
1606                 xSPMV_API_test(Uplo, incX, incX, mMatrix);
1607             }
1608         }
1609     }
1610 
test_L2_SSPMV_API()1611     public void test_L2_SSPMV_API() {
1612         L2_xSPMV_API(mMatrixS);
1613     }
1614 
test_L2_DSPMV_API()1615     public void test_L2_DSPMV_API() {
1616         L2_xSPMV_API(mMatrixD);
1617     }
1618 
test_L2_SSPMV_Correctness()1619     public void test_L2_SSPMV_Correctness() {
1620         int uplo = ScriptIntrinsicBLAS.UPPER;
1621         int incX = 1;
1622         int incY = 1;
1623 
1624         // Populate input allocations
1625         int N = mBLASData.dN;
1626         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
1627         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
1628         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
1629         matrixAS.copyFrom(mBLASData.L2_sSYMV_A_nn_pu);
1630         vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n1);
1631         vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n1);
1632 
1633         // Test for the default case:
1634         mBLAS.SSPMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
1635         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
1636         vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N);
1637         verifyMatrix(vectorYRef, vectorYS);
1638 
1639         // Test for incX = 2 & incY = 3;
1640         incX = 2;
1641         incY = 3;
1642         int dimX = 1 + (N - 1) * incX;
1643         int dimY = 1 + (N - 1) * incY;
1644         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
1645         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
1646         vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n2);
1647         vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n2);
1648 
1649         mBLAS.SSPMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY);
1650         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
1651         vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N2);
1652         verifyMatrix(vectorYRef, vectorYS);
1653 
1654         mRS.finish();
1655         checkError();
1656     }
1657 
test_L2_DSPMV_Correctness()1658     public void test_L2_DSPMV_Correctness() {
1659         int uplo = ScriptIntrinsicBLAS.UPPER;
1660         int incX = 1;
1661         int incY = 1;
1662 
1663         // Populate input allocations
1664         int N = mBLASData.dN;
1665         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
1666         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
1667         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
1668         matrixAD.copyFrom(mBLASData.L2_dSYMV_A_nn_pu);
1669         vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n1);
1670         vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n1);
1671 
1672         // Test for the default case:
1673         mBLAS.DSPMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
1674         Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
1675         vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N);
1676         verifyMatrix(vectorYRef, vectorYD);
1677 
1678         // Test for incX = 2 & incY = 3;
1679         incX = 2;
1680         incY = 3;
1681         int dimX = 1 + (N - 1) * incX;
1682         int dimY = 1 + (N - 1) * incY;
1683         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
1684         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
1685         vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n2);
1686         vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n2);
1687 
1688         mBLAS.DSPMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY);
1689         vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
1690         vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N2);
1691         verifyMatrix(vectorYRef, vectorYD);
1692 
1693         mRS.finish();
1694         checkError();
1695     }
1696 
1697 
1698 
validateTRMV(Element e, int Uplo, int TransA, int Diag, Allocation A, Allocation X, int incX)1699     private boolean validateTRMV(Element e, int Uplo, int TransA, int Diag, Allocation A, Allocation X, int incX) {
1700         if (!validateUplo(Uplo)) {
1701             return false;
1702         }
1703         if (!validateTranspose(TransA)) {
1704             return false;
1705         }
1706         if (!validateDiag(Diag)) {
1707             return false;
1708         }
1709         int N = A.getType().getY();
1710         if (A.getType().getX() != N) {
1711             return false;
1712         }
1713         if (!A.getType().getElement().isCompatible(e) ||
1714             !X.getType().getElement().isCompatible(e)) {
1715             return false;
1716         }
1717         if (X.getType().getY() > 1) {
1718             return false;
1719         }
1720 
1721         if (incX <= 0) {
1722             return false;
1723         }
1724         int expectedXDim = 1 + (N - 1) * incX;
1725         if (X.getType().getX() != expectedXDim) {
1726             return false;
1727         }
1728         return true;
1729     }
1730 
xTRMV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix)1731     private void xTRMV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) {
1732         for (Allocation matA : mMatrix) {
1733             for (Allocation vecX : mMatrix) {
1734                 if (!validateVecInput(vecX)) {
1735                     continue;
1736                 }
1737                 Element elemA = matA.getType().getElement();
1738                 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) {
1739                     try {
1740                         if (elemA.isCompatible(Element.F32(mRS))) {
1741                             mBLAS.STRMV(Uplo, TransA, Diag, matA, vecX, incX);
1742                         } else if (elemA.isCompatible(Element.F64(mRS))) {
1743                             mBLAS.DTRMV(Uplo, TransA, Diag, matA, vecX, incX);
1744                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
1745                             mBLAS.CTRMV(Uplo, TransA, Diag, matA, vecX, incX);
1746                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
1747                             mBLAS.ZTRMV(Uplo, TransA, Diag, matA, vecX, incX);
1748                         }
1749                     } catch (RSRuntimeException e) {
1750                         fail("should NOT throw RSRuntimeException");
1751                     }
1752                 } else {
1753                     try {
1754                         mBLAS.STRMV(Uplo, TransA, Diag, matA, vecX, incX);
1755                         fail("should throw RSRuntimeException for STRMV");
1756                     } catch (RSRuntimeException e) {
1757                     }
1758                     try {
1759                         mBLAS.DTRMV(Uplo, TransA, Diag, matA, vecX, incX);
1760                         fail("should throw RSRuntimeException for DTRMV");
1761                     } catch (RSRuntimeException e) {
1762                     }
1763                     try {
1764                         mBLAS.CTRMV(Uplo, TransA, Diag, matA, vecX, incX);
1765                         fail("should throw RSRuntimeException for CTRMV");
1766                     } catch (RSRuntimeException e) {
1767                     }
1768                     try {
1769                         mBLAS.ZTRMV(Uplo, TransA, Diag, matA, vecX, incX);
1770                         fail("should throw RSRuntimeException for ZTRMV");
1771                     } catch (RSRuntimeException e) {
1772                     }
1773                 }
1774             }
1775         }
1776     }
1777 
L2_xTRMV_API(ArrayList<Allocation> mMatrix)1778     public void L2_xTRMV_API(ArrayList<Allocation> mMatrix) {
1779         for (int Uplo : mUplo) {
1780             for (int TransA : mTranspose) {
1781                 for (int Diag : mDiag) {
1782                     for (int incX : mInc) {
1783                         xTRMV_API_test(Uplo, TransA, Diag, incX, mMatrix);
1784                     }
1785                 }
1786             }
1787         }
1788     }
1789 
test_L2_STRMV_API()1790     public void test_L2_STRMV_API() {
1791         L2_xTRMV_API(mMatrixS);
1792     }
1793 
test_L2_DTRMV_API()1794     public void test_L2_DTRMV_API() {
1795         L2_xTRMV_API(mMatrixD);
1796     }
1797 
test_L2_CTRMV_API()1798     public void test_L2_CTRMV_API() {
1799         L2_xTRMV_API(mMatrixC);
1800     }
1801 
test_L2_ZTRMV_API()1802     public void test_L2_ZTRMV_API() {
1803         L2_xTRMV_API(mMatrixZ);
1804     }
1805 
test_L2_STRMV_Correctness()1806     public void test_L2_STRMV_Correctness() {
1807         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
1808         int uplo = ScriptIntrinsicBLAS.UPPER;
1809         int diag = ScriptIntrinsicBLAS.NON_UNIT;
1810         int incX = 1;
1811 
1812         // Populate input allocations
1813         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
1814         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
1815         matrixAS.copyFrom(mBLASData.L2_sTRMV_A_nn);
1816         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1);
1817 
1818         // Test for the default case: NO_TRANS
1819         mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX);
1820         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
1821         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN);
1822         verifyMatrix(vectorXRef, vectorXS);
1823 
1824         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
1825         trans = ScriptIntrinsicBLAS.TRANSPOSE;
1826         // Reload vector X, since it was overwritten by BLAS.
1827         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1);
1828         mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX);
1829         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UT);
1830         verifyMatrix(vectorXRef, vectorXS);
1831 
1832         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
1833         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1);
1834         mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX);
1835         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UH);
1836         verifyMatrix(vectorXRef, vectorXS);
1837 
1838         // Test for incX = 2;
1839         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
1840         incX = 2;
1841         int dimX = 1 + (mBLASData.dN - 1) * incX;
1842         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
1843         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n2);
1844 
1845         mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX);
1846         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
1847         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN2);
1848         verifyMatrix(vectorXRef, vectorXS);
1849 
1850         mRS.finish();
1851         checkError();
1852     }
1853 
test_L2_DTRMV_Correctness()1854     public void test_L2_DTRMV_Correctness() {
1855         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
1856         int uplo = ScriptIntrinsicBLAS.UPPER;
1857         int diag = ScriptIntrinsicBLAS.NON_UNIT;
1858         int incX = 1;
1859 
1860         // Populate input allocations
1861         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
1862         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
1863         matrixAD.copyFrom(mBLASData.L2_dTRMV_A_nn);
1864         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1);
1865 
1866         // Test for the default case: NO_TRANS
1867         mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX);
1868         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
1869         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN);
1870         verifyMatrix(vectorXRef, vectorXD);
1871 
1872         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
1873         trans = ScriptIntrinsicBLAS.TRANSPOSE;
1874         // Reload vector X, since it was overwritten by BLAS.
1875         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1);
1876         mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX);
1877         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UT);
1878         verifyMatrix(vectorXRef, vectorXD);
1879 
1880         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
1881         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1);
1882         mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX);
1883         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UH);
1884         verifyMatrix(vectorXRef, vectorXD);
1885 
1886         // Test for incX = 2;
1887         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
1888         incX = 2;
1889         int dimX = 1 + (mBLASData.dN - 1) * incX;
1890         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
1891         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n2);
1892 
1893         mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX);
1894         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
1895         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN2);
1896         verifyMatrix(vectorXRef, vectorXD);
1897 
1898         mRS.finish();
1899         checkError();
1900     }
1901 
test_L2_CTRMV_Correctness()1902     public void test_L2_CTRMV_Correctness() {
1903         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
1904         int uplo = ScriptIntrinsicBLAS.UPPER;
1905         int diag = ScriptIntrinsicBLAS.NON_UNIT;
1906         int incX = 1;
1907 
1908         // Populate input allocations
1909         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
1910         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
1911         matrixAC.copyFrom(mBLASData.L2_cTRMV_A_nn);
1912         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1);
1913 
1914         // Test for the default case: NO_TRANS
1915         mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX);
1916         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
1917         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN);
1918         verifyMatrix(vectorXRef, vectorXC);
1919 
1920         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
1921         trans = ScriptIntrinsicBLAS.TRANSPOSE;
1922         // Reload vector X, since it was overwritten by BLAS.
1923         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1);
1924         mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX);
1925         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UT);
1926         verifyMatrix(vectorXRef, vectorXC);
1927 
1928         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
1929         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1);
1930         mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX);
1931         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UH);
1932         verifyMatrix(vectorXRef, vectorXC);
1933 
1934         // Test for incX = 2;
1935         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
1936         incX = 2;
1937         int dimX = 1 + (mBLASData.dN - 1) * incX;
1938         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
1939         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n2);
1940 
1941         mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX);
1942         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
1943         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN2);
1944         verifyMatrix(vectorXRef, vectorXC);
1945 
1946         mRS.finish();
1947         checkError();
1948     }
1949 
test_L2_ZTRMV_Correctness()1950     public void test_L2_ZTRMV_Correctness() {
1951         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
1952         int uplo = ScriptIntrinsicBLAS.UPPER;
1953         int diag = ScriptIntrinsicBLAS.NON_UNIT;
1954         int incX = 1;
1955 
1956         // Populate input allocations
1957         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
1958         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
1959         matrixAZ.copyFrom(mBLASData.L2_zTRMV_A_nn);
1960         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1);
1961 
1962         // Test for the default case: NO_TRANS
1963         mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
1964         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
1965         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN);
1966         verifyMatrix(vectorXRef, vectorXZ);
1967 
1968         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
1969         trans = ScriptIntrinsicBLAS.TRANSPOSE;
1970         // Reload vector X, since it was overwritten by BLAS.
1971         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1);
1972         mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
1973         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UT);
1974         verifyMatrix(vectorXRef, vectorXZ);
1975 
1976         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
1977         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1);
1978         mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
1979         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UH);
1980         verifyMatrix(vectorXRef, vectorXZ);
1981 
1982         // Test for incX = 2;
1983         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
1984         incX = 2;
1985         int dimX = 1 + (mBLASData.dN - 1) * incX;
1986         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
1987         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n2);
1988 
1989         mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
1990         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
1991         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN2);
1992         verifyMatrix(vectorXRef, vectorXZ);
1993 
1994         mRS.finish();
1995         checkError();
1996     }
1997 
1998 
1999 
xTBMV_API_test(int Uplo, int TransA, int Diag, int K, int incX, ArrayList<Allocation> mMatrix)2000     private void xTBMV_API_test(int Uplo, int TransA, int Diag, int K, int incX, ArrayList<Allocation> mMatrix) {
2001         for (Allocation matA : mMatrix) {
2002             for (Allocation vecX : mMatrix) {
2003                 Element elemA = matA.getType().getElement();
2004                 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX) && K >= 0) {
2005                     try {
2006                         if (elemA.isCompatible(Element.F32(mRS))) {
2007                             mBLAS.STBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
2008                         } else if (elemA.isCompatible(Element.F64(mRS))) {
2009                             mBLAS.DTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
2010                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
2011                             mBLAS.CTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
2012                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
2013                             mBLAS.ZTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
2014                         }
2015                     } catch (RSRuntimeException e) {
2016                         fail("should NOT throw RSRuntimeException");
2017                     }
2018                 } else {
2019                     try {
2020                         mBLAS.STBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
2021                         fail("should throw RSRuntimeException for STBMV");
2022                     } catch (RSRuntimeException e) {
2023                     }
2024                     try {
2025                         mBLAS.DTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
2026                         fail("should throw RSRuntimeException for DTBMV");
2027                     } catch (RSRuntimeException e) {
2028                     }
2029                     try {
2030                         mBLAS.CTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
2031                         fail("should throw RSRuntimeException for CTBMV");
2032                     } catch (RSRuntimeException e) {
2033                     }
2034                     try {
2035                         mBLAS.ZTBMV(Uplo, TransA, Diag, K, matA, vecX, incX);
2036                         fail("should throw RSRuntimeException for ZTBMV");
2037                     } catch (RSRuntimeException e) {
2038                     }
2039                 }
2040             }
2041         }
2042     }
2043 
L2_xTBMV_API(ArrayList<Allocation> mMatrix)2044     public void L2_xTBMV_API(ArrayList<Allocation> mMatrix) {
2045         for (int Uplo : mUplo) {
2046             for (int TransA : mTranspose) {
2047                 for (int Diag : mDiag) {
2048                     for (int K : mK) {
2049                         for (int incX : mInc) {
2050                             xTBMV_API_test(Uplo, TransA, Diag, K, incX, mMatrix);
2051                         }
2052                     }
2053                 }
2054             }
2055         }
2056     }
2057 
test_L2_STBMV_API()2058     public void test_L2_STBMV_API() {
2059         L2_xTBMV_API(mMatrixS);
2060     }
2061 
test_L2_DTBMV_API()2062     public void test_L2_DTBMV_API() {
2063         L2_xTBMV_API(mMatrixD);
2064     }
2065 
test_L2_CTBMV_API()2066     public void test_L2_CTBMV_API() {
2067         L2_xTBMV_API(mMatrixC);
2068     }
2069 
test_L2_ZTBMV_API()2070     public void test_L2_ZTBMV_API() {
2071         L2_xTBMV_API(mMatrixZ);
2072     }
2073 
test_L2_STBMV_Correctness()2074     public void test_L2_STBMV_Correctness() {
2075         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2076         int uplo = ScriptIntrinsicBLAS.UPPER;
2077         int diag = ScriptIntrinsicBLAS.NON_UNIT;
2078         int incX = 1;
2079 
2080         // Populate input allocations
2081         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
2082         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
2083         matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_sTBMV_A_nn);
2084         vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n1);
2085 
2086         // Test for the default case: NO_TRANS
2087         mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
2088         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
2089         vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UN);
2090         verifyMatrix(vectorXRef, vectorXS);
2091 
2092         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
2093         trans = ScriptIntrinsicBLAS.TRANSPOSE;
2094         // Reload vector X, since it was overwritten by BLAS.
2095         vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n1);
2096         mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
2097         vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UT);
2098         verifyMatrix(vectorXRef, vectorXS);
2099 
2100         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
2101         vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n1);
2102         mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
2103         vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UH);
2104         verifyMatrix(vectorXRef, vectorXS);
2105 
2106         // Test for incX = 2;
2107         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2108         incX = 2;
2109         int dimX = 1 + (mBLASData.dN - 1) * incX;
2110         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
2111         vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n2);
2112 
2113         mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
2114         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
2115         vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UN2);
2116         verifyMatrix(vectorXRef, vectorXS);
2117 
2118         mRS.finish();
2119         checkError();
2120     }
2121 
test_L2_DTBMV_Correctness()2122     public void test_L2_DTBMV_Correctness() {
2123         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2124         int uplo = ScriptIntrinsicBLAS.UPPER;
2125         int diag = ScriptIntrinsicBLAS.NON_UNIT;
2126         int incX = 1;
2127 
2128         // Populate input allocations
2129         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
2130         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
2131         matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_dTBMV_A_nn);
2132         vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n1);
2133 
2134         // Test for the default case: NO_TRANS
2135         mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
2136         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
2137         vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UN);
2138         verifyMatrix(vectorXRef, vectorXD);
2139 
2140         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
2141         trans = ScriptIntrinsicBLAS.TRANSPOSE;
2142         // Reload vector X, since it was overwritten by BLAS.
2143         vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n1);
2144         mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
2145         vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UT);
2146         verifyMatrix(vectorXRef, vectorXD);
2147 
2148         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
2149         vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n1);
2150         mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
2151         vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UH);
2152         verifyMatrix(vectorXRef, vectorXD);
2153 
2154         // Test for incX = 2;
2155         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2156         incX = 2;
2157         int dimX = 1 + (mBLASData.dN - 1) * incX;
2158         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
2159         vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n2);
2160 
2161         mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
2162         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
2163         vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UN2);
2164         verifyMatrix(vectorXRef, vectorXD);
2165 
2166         mRS.finish();
2167         checkError();
2168     }
2169 
test_L2_CTBMV_Correctness()2170     public void test_L2_CTBMV_Correctness() {
2171         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2172         int uplo = ScriptIntrinsicBLAS.UPPER;
2173         int diag = ScriptIntrinsicBLAS.NON_UNIT;
2174         int incX = 1;
2175 
2176         // Populate input allocations
2177         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
2178         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
2179         matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_cTBMV_A_nn);
2180         vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n1);
2181 
2182         // Test for the default case: NO_TRANS
2183         mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
2184         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
2185         vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UN);
2186         verifyMatrix(vectorXRef, vectorXC);
2187 
2188         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
2189         trans = ScriptIntrinsicBLAS.TRANSPOSE;
2190         // Reload vector X, since it was overwritten by BLAS.
2191         vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n1);
2192         mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
2193         vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UT);
2194         verifyMatrix(vectorXRef, vectorXC);
2195 
2196         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
2197         vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n1);
2198         mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
2199         vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UH);
2200         verifyMatrix(vectorXRef, vectorXC);
2201 
2202         // Test for incX = 2;
2203         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2204         incX = 2;
2205         int dimX = 1 + (mBLASData.dN - 1) * incX;
2206         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
2207         vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n2);
2208 
2209         mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
2210         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
2211         vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UN2);
2212         verifyMatrix(vectorXRef, vectorXC);
2213 
2214         mRS.finish();
2215         checkError();
2216     }
2217 
test_L2_ZTBMV_Correctness()2218     public void test_L2_ZTBMV_Correctness() {
2219         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2220         int uplo = ScriptIntrinsicBLAS.UPPER;
2221         int diag = ScriptIntrinsicBLAS.NON_UNIT;
2222         int incX = 1;
2223 
2224         // Populate input allocations
2225         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
2226         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
2227         matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_zTBMV_A_nn);
2228         vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n1);
2229 
2230         // Test for the default case: NO_TRANS
2231         mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
2232         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
2233         vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UN);
2234         verifyMatrix(vectorXRef, vectorXZ);
2235 
2236         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
2237         trans = ScriptIntrinsicBLAS.TRANSPOSE;
2238         // Reload vector X, since it was overwritten by BLAS.
2239         vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n1);
2240         mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
2241         vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UT);
2242         verifyMatrix(vectorXRef, vectorXZ);
2243 
2244         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
2245         vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n1);
2246         mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
2247         vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UH);
2248         verifyMatrix(vectorXRef, vectorXZ);
2249 
2250         // Test for incX = 2;
2251         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2252         incX = 2;
2253         int dimX = 1 + (mBLASData.dN - 1) * incX;
2254         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
2255         vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n2);
2256 
2257         mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
2258         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
2259         vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UN2);
2260         verifyMatrix(vectorXRef, vectorXZ);
2261 
2262         mRS.finish();
2263         checkError();
2264     }
2265 
2266 
validateTPMV(Element e, int Uplo, int TransA, int Diag, Allocation Ap, Allocation X, int incX)2267     private boolean validateTPMV(Element e, int Uplo, int TransA, int Diag, Allocation Ap, Allocation X, int incX) {
2268         if (!validateUplo(Uplo)) {
2269             return false;
2270         }
2271         if (!validateTranspose(TransA)) {
2272             return false;
2273         }
2274         if (!validateDiag(Diag)) {
2275             return false;
2276         }
2277         if (!Ap.getType().getElement().isCompatible(e) ||
2278             !X.getType().getElement().isCompatible(e)) {
2279             return false;
2280         }
2281         if (X.getType().getY() > 1) {
2282             return false;
2283         }
2284 
2285         if (Ap.getType().getY() > 1) {
2286             return false;
2287         }
2288 
2289         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
2290         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
2291             return false;
2292         }
2293         if (incX <= 0) {
2294             return false;
2295         }
2296         int expectedXDim = 1 + (N - 1) * incX;
2297         if (X.getType().getX() != expectedXDim) {
2298             return false;
2299         }
2300 
2301         return true;
2302     }
2303 
xTPMV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix)2304     private void xTPMV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) {
2305         for (Allocation matA : mMatrix) {
2306             for (Allocation vecX : mMatrix) {
2307                 if (!validateVecInput(vecX)) {
2308                     continue;
2309                 }
2310                 Element elemA = matA.getType().getElement();
2311                 if (validateTPMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) {
2312                     try {
2313                         if (elemA.isCompatible(Element.F32(mRS))) {
2314                             mBLAS.STPMV(Uplo, TransA, Diag, matA, vecX, incX);
2315                         } else if (elemA.isCompatible(Element.F64(mRS))) {
2316                             mBLAS.DTPMV(Uplo, TransA, Diag, matA, vecX, incX);
2317                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
2318                             mBLAS.CTPMV(Uplo, TransA, Diag, matA, vecX, incX);
2319                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
2320                             mBLAS.ZTPMV(Uplo, TransA, Diag, matA, vecX, incX);
2321                         }
2322                     } catch (RSRuntimeException e) {
2323                         fail("should NOT throw RSRuntimeException");
2324                     }
2325                 } else {
2326                     try {
2327                         mBLAS.STPMV(Uplo, TransA, Diag, matA, vecX, incX);
2328                         fail("should throw RSRuntimeException for STPMV");
2329                     } catch (RSRuntimeException e) {
2330                     }
2331                     try {
2332                         mBLAS.DTPMV(Uplo, TransA, Diag, matA, vecX, incX);
2333                         fail("should throw RSRuntimeException for DTPMV");
2334                     } catch (RSRuntimeException e) {
2335                     }
2336                     try {
2337                         mBLAS.CTPMV(Uplo, TransA, Diag, matA, vecX, incX);
2338                         fail("should throw RSRuntimeException for CTPMV");
2339                     } catch (RSRuntimeException e) {
2340                     }
2341                     try {
2342                         mBLAS.ZTPMV(Uplo, TransA, Diag, matA, vecX, incX);
2343                         fail("should throw RSRuntimeException for ZTPMV");
2344                     } catch (RSRuntimeException e) {
2345                     }
2346                 }
2347             }
2348         }
2349     }
2350 
L2_xTPMV_API(ArrayList<Allocation> mMatrix)2351     public void L2_xTPMV_API(ArrayList<Allocation> mMatrix) {
2352         for (int Uplo : mUplo) {
2353             for (int TransA : mTranspose) {
2354                 for (int Diag : mDiag) {
2355                     for (int incX : mInc) {
2356                         xTPMV_API_test(Uplo, TransA, Diag, incX, mMatrix);
2357                     }
2358                 }
2359             }
2360         }
2361     }
2362 
test_L2_STPMV_API()2363     public void test_L2_STPMV_API() {
2364         L2_xTPMV_API(mMatrixS);
2365     }
2366 
test_L2_DTPMV_API()2367     public void test_L2_DTPMV_API() {
2368         L2_xTPMV_API(mMatrixD);
2369     }
2370 
test_L2_CTPMV_API()2371     public void test_L2_CTPMV_API() {
2372         L2_xTPMV_API(mMatrixC);
2373     }
2374 
test_L2_ZTPMV_API()2375     public void test_L2_ZTPMV_API() {
2376         L2_xTPMV_API(mMatrixZ);
2377     }
2378 
test_L2_STPMV_Correctness()2379     public void test_L2_STPMV_Correctness() {
2380         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2381         int uplo = ScriptIntrinsicBLAS.UPPER;
2382         int diag = ScriptIntrinsicBLAS.NON_UNIT;
2383         int incX = 1;
2384 
2385         // Populate input allocations
2386         int N = mBLASData.dN;
2387         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
2388         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
2389         matrixAS.copyFrom(mBLASData.L2_sTRMV_A_nn_pu);
2390         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1);
2391 
2392         // Test for the default case: NO_TRANS
2393         mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX);
2394         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
2395         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN);
2396         verifyMatrix(vectorXRef, vectorXS);
2397 
2398         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
2399         trans = ScriptIntrinsicBLAS.TRANSPOSE;
2400         // Reload vector X, since it was overwritten by BLAS.
2401         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1);
2402         mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX);
2403         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UT);
2404         verifyMatrix(vectorXRef, vectorXS);
2405 
2406         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
2407         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1);
2408         mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX);
2409         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UH);
2410         verifyMatrix(vectorXRef, vectorXS);
2411 
2412         // Test for incX = 2;
2413         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2414         incX = 2;
2415         int dimX = 1 + (N - 1) * incX;
2416         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
2417         vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n2);
2418 
2419         mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX);
2420         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
2421         vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN2);
2422         verifyMatrix(vectorXRef, vectorXS);
2423 
2424         mRS.finish();
2425         checkError();
2426     }
2427 
test_L2_DTPMV_Correctness()2428     public void test_L2_DTPMV_Correctness() {
2429         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2430         int uplo = ScriptIntrinsicBLAS.UPPER;
2431         int diag = ScriptIntrinsicBLAS.NON_UNIT;
2432         int incX = 1;
2433 
2434         // Populate input allocations
2435         int N = mBLASData.dN;
2436         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
2437         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
2438         matrixAD.copyFrom(mBLASData.L2_dTRMV_A_nn_pu);
2439         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1);
2440 
2441         // Test for the default case: NO_TRANS
2442         mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX);
2443         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
2444         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN);
2445         verifyMatrix(vectorXRef, vectorXD);
2446 
2447         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
2448         trans = ScriptIntrinsicBLAS.TRANSPOSE;
2449         // Reload vector X, since it was overwritten by BLAS.
2450         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1);
2451         mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX);
2452         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UT);
2453         verifyMatrix(vectorXRef, vectorXD);
2454 
2455         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
2456         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1);
2457         mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX);
2458         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UH);
2459         verifyMatrix(vectorXRef, vectorXD);
2460 
2461         // Test for incX = 2;
2462         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2463         incX = 2;
2464         int dimX = 1 + (N - 1) * incX;
2465         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
2466         vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n2);
2467 
2468         mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX);
2469         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
2470         vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN2);
2471         verifyMatrix(vectorXRef, vectorXD);
2472 
2473         mRS.finish();
2474         checkError();
2475     }
2476 
test_L2_CTPMV_Correctness()2477     public void test_L2_CTPMV_Correctness() {
2478         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2479         int uplo = ScriptIntrinsicBLAS.UPPER;
2480         int diag = ScriptIntrinsicBLAS.NON_UNIT;
2481         int incX = 1;
2482 
2483         // Populate input allocations
2484         int N = mBLASData.dN;
2485         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
2486         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
2487         matrixAC.copyFrom(mBLASData.L2_cTRMV_A_nn_pu);
2488         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1);
2489 
2490         // Test for the default case: NO_TRANS
2491         mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX);
2492         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
2493         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN);
2494         verifyMatrix(vectorXRef, vectorXC);
2495 
2496         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
2497         trans = ScriptIntrinsicBLAS.TRANSPOSE;
2498         // Reload vector X, since it was overwritten by BLAS.
2499         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1);
2500         mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX);
2501         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UT);
2502         verifyMatrix(vectorXRef, vectorXC);
2503 
2504         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
2505         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1);
2506         mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX);
2507         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UH);
2508         verifyMatrix(vectorXRef, vectorXC);
2509 
2510         // Test for incX = 2;
2511         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2512         incX = 2;
2513         int dimX = 1 + (N - 1) * incX;
2514         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
2515         vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n2);
2516 
2517         mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX);
2518         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
2519         vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN2);
2520         verifyMatrix(vectorXRef, vectorXC);
2521 
2522         mRS.finish();
2523         checkError();
2524     }
2525 
test_L2_ZTPMV_Correctness()2526     public void test_L2_ZTPMV_Correctness() {
2527         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2528         int uplo = ScriptIntrinsicBLAS.UPPER;
2529         int diag = ScriptIntrinsicBLAS.NON_UNIT;
2530         int incX = 1;
2531 
2532         // Populate input allocations
2533         int N = mBLASData.dN;
2534         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
2535         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
2536         matrixAZ.copyFrom(mBLASData.L2_zTRMV_A_nn_pu);
2537         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1);
2538 
2539         // Test for the default case: NO_TRANS
2540         mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
2541         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
2542         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN);
2543         verifyMatrix(vectorXRef, vectorXZ);
2544 
2545         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
2546         trans = ScriptIntrinsicBLAS.TRANSPOSE;
2547         // Reload vector X, since it was overwritten by BLAS.
2548         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1);
2549         mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
2550         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UT);
2551         verifyMatrix(vectorXRef, vectorXZ);
2552 
2553         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
2554         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1);
2555         mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
2556         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UH);
2557         verifyMatrix(vectorXRef, vectorXZ);
2558 
2559         // Test for incX = 2;
2560         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2561         incX = 2;
2562         int dimX = 1 + (N - 1) * incX;
2563         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
2564         vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n2);
2565 
2566         mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
2567         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
2568         vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN2);
2569         verifyMatrix(vectorXRef, vectorXZ);
2570 
2571         mRS.finish();
2572         checkError();
2573     }
2574 
2575 
xTRSV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix)2576     private void xTRSV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) {
2577         for (Allocation matA : mMatrix) {
2578             for (Allocation vecX : mMatrix) {
2579                 if (!validateVecInput(vecX)) {
2580                     continue;
2581                 }
2582                 Element elemA = matA.getType().getElement();
2583                 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) {
2584                     try {
2585                         if (elemA.isCompatible(Element.F32(mRS))) {
2586                             mBLAS.STRSV(Uplo, TransA, Diag, matA, vecX, incX);
2587                         } else if (elemA.isCompatible(Element.F64(mRS))) {
2588                             mBLAS.DTRSV(Uplo, TransA, Diag, matA, vecX, incX);
2589                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
2590                             mBLAS.CTRSV(Uplo, TransA, Diag, matA, vecX, incX);
2591                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
2592                             mBLAS.ZTRSV(Uplo, TransA, Diag, matA, vecX, incX);
2593                         }
2594                     } catch (RSRuntimeException e) {
2595                         fail("should NOT throw RSRuntimeException");
2596                     }
2597                 } else {
2598                     try {
2599                         mBLAS.STRSV(Uplo, TransA, Diag, matA, vecX, incX);
2600                         fail("should throw RSRuntimeException for STRSV");
2601                     } catch (RSRuntimeException e) {
2602                     }
2603                     try {
2604                         mBLAS.DTRSV(Uplo, TransA, Diag, matA, vecX, incX);
2605                         fail("should throw RSRuntimeException for DTRSV");
2606                     } catch (RSRuntimeException e) {
2607                     }
2608                     try {
2609                         mBLAS.CTRSV(Uplo, TransA, Diag, matA, vecX, incX);
2610                         fail("should throw RSRuntimeException for CTRSV");
2611                     } catch (RSRuntimeException e) {
2612                     }
2613                     try {
2614                         mBLAS.ZTRSV(Uplo, TransA, Diag, matA, vecX, incX);
2615                         fail("should throw RSRuntimeException for ZTRSV");
2616                     } catch (RSRuntimeException e) {
2617                     }
2618                 }
2619             }
2620         }
2621     }
2622 
L2_xTRSV_API(ArrayList<Allocation> mMatrix)2623     public void L2_xTRSV_API(ArrayList<Allocation> mMatrix) {
2624         for (int Uplo : mUplo) {
2625             for (int TransA : mTranspose) {
2626                 for (int Diag : mDiag) {
2627                     for (int incX : mInc) {
2628                         xTRSV_API_test(Uplo, TransA, Diag, incX, mMatrix);
2629                     }
2630                 }
2631             }
2632         }
2633     }
2634 
test_L2_STRSV_API()2635     public void test_L2_STRSV_API() {
2636         L2_xTRSV_API(mMatrixS);
2637     }
2638 
test_L2_DTRSV_API()2639     public void test_L2_DTRSV_API() {
2640         L2_xTRSV_API(mMatrixD);
2641     }
2642 
test_L2_CTRSV_API()2643     public void test_L2_CTRSV_API() {
2644         L2_xTRSV_API(mMatrixC);
2645     }
2646 
test_L2_ZTRSV_API()2647     public void test_L2_ZTRSV_API() {
2648         L2_xTRSV_API(mMatrixZ);
2649     }
2650 
test_L2_STRSV_Correctness()2651     public void test_L2_STRSV_Correctness() {
2652         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2653         int uplo = ScriptIntrinsicBLAS.UPPER;
2654         int diag = ScriptIntrinsicBLAS.NON_UNIT;
2655         int incX = 1;
2656 
2657         // Populate input allocations
2658         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
2659         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
2660         matrixAS.copyFrom(mBLASData.L2_sTRSV_A_nn);
2661         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1);
2662 
2663         // Test for the default case: NO_TRANS
2664         mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX);
2665         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
2666         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN);
2667         verifyMatrix(vectorXRef, vectorXS);
2668 
2669         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
2670         trans = ScriptIntrinsicBLAS.TRANSPOSE;
2671         // Reload vector X, since it was overwritten by BLAS.
2672         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1);
2673         mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX);
2674         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UT);
2675         verifyMatrix(vectorXRef, vectorXS);
2676 
2677         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
2678         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1);
2679         mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX);
2680         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UH);
2681         verifyMatrix(vectorXRef, vectorXS);
2682 
2683         // Test for incX = 2;
2684         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2685         incX = 2;
2686         int dimX = 1 + (mBLASData.dN - 1) * incX;
2687         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
2688         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n2);
2689 
2690         mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX);
2691         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
2692         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN2);
2693         verifyMatrix(vectorXRef, vectorXS);
2694 
2695         mRS.finish();
2696         checkError();
2697     }
2698 
test_L2_DTRSV_Correctness()2699     public void test_L2_DTRSV_Correctness() {
2700         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2701         int uplo = ScriptIntrinsicBLAS.UPPER;
2702         int diag = ScriptIntrinsicBLAS.NON_UNIT;
2703         int incX = 1;
2704 
2705         // Populate input allocations
2706         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
2707         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
2708         matrixAD.copyFrom(mBLASData.L2_dTRSV_A_nn);
2709         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1);
2710 
2711         // Test for the default case: NO_TRANS
2712         mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX);
2713         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
2714         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN);
2715         verifyMatrix(vectorXRef, vectorXD);
2716 
2717         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
2718         trans = ScriptIntrinsicBLAS.TRANSPOSE;
2719         // Reload vector X, since it was overwritten by BLAS.
2720         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1);
2721         mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX);
2722         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UT);
2723         verifyMatrix(vectorXRef, vectorXD);
2724 
2725         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
2726         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1);
2727         mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX);
2728         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UH);
2729         verifyMatrix(vectorXRef, vectorXD);
2730 
2731         // Test for incX = 2;
2732         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2733         incX = 2;
2734         int dimX = 1 + (mBLASData.dN - 1) * incX;
2735         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
2736         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n2);
2737 
2738         mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX);
2739         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
2740         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN2);
2741         verifyMatrix(vectorXRef, vectorXD);
2742 
2743         mRS.finish();
2744         checkError();
2745     }
2746 
test_L2_CTRSV_Correctness()2747     public void test_L2_CTRSV_Correctness() {
2748         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2749         int uplo = ScriptIntrinsicBLAS.UPPER;
2750         int diag = ScriptIntrinsicBLAS.NON_UNIT;
2751         int incX = 1;
2752 
2753         // Populate input allocations
2754         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
2755         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
2756         matrixAC.copyFrom(mBLASData.L2_cTRSV_A_nn);
2757         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1);
2758 
2759         // Test for the default case: NO_TRANS
2760         mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX);
2761         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
2762         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN);
2763         verifyMatrix(vectorXRef, vectorXC);
2764 
2765         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
2766         trans = ScriptIntrinsicBLAS.TRANSPOSE;
2767         // Reload vector X, since it was overwritten by BLAS.
2768         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1);
2769         mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX);
2770         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UT);
2771         verifyMatrix(vectorXRef, vectorXC);
2772 
2773         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
2774         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1);
2775         mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX);
2776         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UH);
2777         verifyMatrix(vectorXRef, vectorXC);
2778 
2779         // Test for incX = 2;
2780         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2781         incX = 2;
2782         int dimX = 1 + (mBLASData.dN - 1) * incX;
2783         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
2784         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n2);
2785 
2786         mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX);
2787         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
2788         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN2);
2789         verifyMatrix(vectorXRef, vectorXC);
2790 
2791         mRS.finish();
2792         checkError();
2793     }
2794 
test_L2_ZTRSV_Correctness()2795     public void test_L2_ZTRSV_Correctness() {
2796         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2797         int uplo = ScriptIntrinsicBLAS.UPPER;
2798         int diag = ScriptIntrinsicBLAS.NON_UNIT;
2799         int incX = 1;
2800 
2801         // Populate input allocations
2802         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
2803         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
2804         matrixAZ.copyFrom(mBLASData.L2_zTRSV_A_nn);
2805         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1);
2806 
2807         // Test for the default case: NO_TRANS
2808         mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
2809         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
2810         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN);
2811         verifyMatrix(vectorXRef, vectorXZ);
2812 
2813         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
2814         trans = ScriptIntrinsicBLAS.TRANSPOSE;
2815         // Reload vector X, since it was overwritten by BLAS.
2816         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1);
2817         mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
2818         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UT);
2819         verifyMatrix(vectorXRef, vectorXZ);
2820 
2821         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
2822         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1);
2823         mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
2824         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UH);
2825         verifyMatrix(vectorXRef, vectorXZ);
2826 
2827         // Test for incX = 2;
2828         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2829         incX = 2;
2830         int dimX = 1 + (mBLASData.dN - 1) * incX;
2831         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
2832         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n2);
2833 
2834         mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
2835         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
2836         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN2);
2837         verifyMatrix(vectorXRef, vectorXZ);
2838 
2839         mRS.finish();
2840         checkError();
2841     }
2842 
2843 
xTBSV_API_test(int Uplo, int TransA, int Diag, int K, int incX, ArrayList<Allocation> mMatrix)2844     private void xTBSV_API_test(int Uplo, int TransA, int Diag, int K, int incX, ArrayList<Allocation> mMatrix) {
2845         for (Allocation matA : mMatrix) {
2846             for (Allocation vecX : mMatrix) {
2847                 if (!validateVecInput(vecX)) {
2848                     continue;
2849                 }
2850                 Element elemA = matA.getType().getElement();
2851                 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX) && K >= 0) {
2852                     try {
2853                         if (elemA.isCompatible(Element.F32(mRS))) {
2854                             mBLAS.STBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
2855                         } else if (elemA.isCompatible(Element.F64(mRS))) {
2856                             mBLAS.DTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
2857                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
2858                             mBLAS.CTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
2859                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
2860                             mBLAS.ZTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
2861                         }
2862                     } catch (RSRuntimeException e) {
2863                         fail("should NOT throw RSRuntimeException");
2864                     }
2865                 } else {
2866                     try {
2867                         mBLAS.STBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
2868                         fail("should throw RSRuntimeException for STBSV");
2869                     } catch (RSRuntimeException e) {
2870                     }
2871                     try {
2872                         mBLAS.DTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
2873                         fail("should throw RSRuntimeException for DTBSV");
2874                     } catch (RSRuntimeException e) {
2875                     }
2876                     try {
2877                         mBLAS.CTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
2878                         fail("should throw RSRuntimeException for CTBSV");
2879                     } catch (RSRuntimeException e) {
2880                     }
2881                     try {
2882                         mBLAS.ZTBSV(Uplo, TransA, Diag, K, matA, vecX, incX);
2883                         fail("should throw RSRuntimeException for ZTBSV");
2884                     } catch (RSRuntimeException e) {
2885                     }
2886                 }
2887             }
2888         }
2889     }
2890 
L2_xTBSV_API(ArrayList<Allocation> mMatrix)2891     public void L2_xTBSV_API(ArrayList<Allocation> mMatrix) {
2892         for (int Uplo : mUplo) {
2893             for (int TransA : mTranspose) {
2894                 for (int Diag : mDiag) {
2895                     for (int K : mK) {
2896                         for (int incX : mInc) {
2897                             xTBSV_API_test(Uplo, TransA, Diag, K, incX, mMatrix);
2898                         }
2899                     }
2900                 }
2901             }
2902         }
2903     }
2904 
test_L2_STBSV_API()2905     public void test_L2_STBSV_API() {
2906         L2_xTBSV_API(mMatrixS);
2907     }
2908 
test_L2_DTBSV_API()2909     public void test_L2_DTBSV_API() {
2910         L2_xTBSV_API(mMatrixD);
2911     }
2912 
test_L2_CTBSV_API()2913     public void test_L2_CTBSV_API() {
2914         L2_xTBSV_API(mMatrixC);
2915     }
2916 
test_L2_ZTBSV_API()2917     public void test_L2_ZTBSV_API() {
2918         L2_xTBSV_API(mMatrixZ);
2919     }
2920 
test_L2_STBSV_Correctness()2921     public void test_L2_STBSV_Correctness() {
2922         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2923         int uplo = ScriptIntrinsicBLAS.UPPER;
2924         int diag = ScriptIntrinsicBLAS.NON_UNIT;
2925         int incX = 1;
2926 
2927         // Populate input allocations
2928         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
2929         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
2930         matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_sTBSV_A_nn);
2931         vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n1);
2932 
2933         // Test for the default case: NO_TRANS
2934         mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
2935         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
2936         vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UN);
2937         verifyMatrix(vectorXRef, vectorXS);
2938 
2939         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
2940         trans = ScriptIntrinsicBLAS.TRANSPOSE;
2941         // Reload vector X, since it was overwritten by BLAS.
2942         vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n1);
2943         mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
2944         vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UT);
2945         verifyMatrix(vectorXRef, vectorXS);
2946 
2947         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
2948         vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n1);
2949         mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
2950         vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UH);
2951         verifyMatrix(vectorXRef, vectorXS);
2952 
2953         // Test for incX = 2;
2954         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2955         incX = 2;
2956         int dimX = 1 + (mBLASData.dN - 1) * incX;
2957         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
2958         vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n2);
2959 
2960         mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX);
2961         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
2962         vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UN2);
2963         verifyMatrix(vectorXRef, vectorXS);
2964 
2965         mRS.finish();
2966         checkError();
2967     }
2968 
test_L2_DTBSV_Correctness()2969     public void test_L2_DTBSV_Correctness() {
2970         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
2971         int uplo = ScriptIntrinsicBLAS.UPPER;
2972         int diag = ScriptIntrinsicBLAS.NON_UNIT;
2973         int incX = 1;
2974 
2975         // Populate input allocations
2976         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
2977         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
2978         matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_dTBSV_A_nn);
2979         vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n1);
2980 
2981         // Test for the default case: NO_TRANS
2982         mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
2983         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
2984         vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UN);
2985         verifyMatrix(vectorXRef, vectorXD);
2986 
2987         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
2988         trans = ScriptIntrinsicBLAS.TRANSPOSE;
2989         // Reload vector X, since it was overwritten by BLAS.
2990         vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n1);
2991         mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
2992         vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UT);
2993         verifyMatrix(vectorXRef, vectorXD);
2994 
2995         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
2996         vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n1);
2997         mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
2998         vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UH);
2999         verifyMatrix(vectorXRef, vectorXD);
3000 
3001         // Test for incX = 2;
3002         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
3003         incX = 2;
3004         int dimX = 1 + (mBLASData.dN - 1) * incX;
3005         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
3006         vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n2);
3007 
3008         mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX);
3009         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
3010         vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UN2);
3011         verifyMatrix(vectorXRef, vectorXD);
3012 
3013         mRS.finish();
3014         checkError();
3015     }
3016 
test_L2_CTBSV_Correctness()3017     public void test_L2_CTBSV_Correctness() {
3018         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
3019         int uplo = ScriptIntrinsicBLAS.UPPER;
3020         int diag = ScriptIntrinsicBLAS.NON_UNIT;
3021         int incX = 1;
3022 
3023         // Populate input allocations
3024         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
3025         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
3026         matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_cTBSV_A_nn);
3027         vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n1);
3028 
3029         // Test for the default case: NO_TRANS
3030         mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
3031         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
3032         vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UN);
3033         verifyMatrix(vectorXRef, vectorXC);
3034 
3035         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
3036         trans = ScriptIntrinsicBLAS.TRANSPOSE;
3037         // Reload vector X, since it was overwritten by BLAS.
3038         vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n1);
3039         mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
3040         vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UT);
3041         verifyMatrix(vectorXRef, vectorXC);
3042 
3043         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
3044         vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n1);
3045         mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
3046         vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UH);
3047         verifyMatrix(vectorXRef, vectorXC);
3048 
3049         // Test for incX = 2;
3050         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
3051         incX = 2;
3052         int dimX = 1 + (mBLASData.dN - 1) * incX;
3053         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
3054         vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n2);
3055 
3056         mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX);
3057         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
3058         vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UN2);
3059         verifyMatrix(vectorXRef, vectorXC);
3060 
3061         mRS.finish();
3062         checkError();
3063     }
3064 
test_L2_ZTBSV_Correctness()3065     public void test_L2_ZTBSV_Correctness() {
3066         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
3067         int uplo = ScriptIntrinsicBLAS.UPPER;
3068         int diag = ScriptIntrinsicBLAS.NON_UNIT;
3069         int incX = 1;
3070 
3071         // Populate input allocations
3072         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
3073         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
3074         matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_zTBSV_A_nn);
3075         vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n1);
3076 
3077         // Test for the default case: NO_TRANS
3078         mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
3079         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
3080         vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UN);
3081         verifyMatrix(vectorXRef, vectorXZ);
3082 
3083         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
3084         trans = ScriptIntrinsicBLAS.TRANSPOSE;
3085         // Reload vector X, since it was overwritten by BLAS.
3086         vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n1);
3087         mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
3088         vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UT);
3089         verifyMatrix(vectorXRef, vectorXZ);
3090 
3091         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
3092         vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n1);
3093         mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
3094         vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UH);
3095         verifyMatrix(vectorXRef, vectorXZ);
3096 
3097         // Test for incX = 2;
3098         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
3099         incX = 2;
3100         int dimX = 1 + (mBLASData.dN - 1) * incX;
3101         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
3102         vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n2);
3103 
3104         mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX);
3105         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
3106         vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UN2);
3107         verifyMatrix(vectorXRef, vectorXZ);
3108 
3109         mRS.finish();
3110         checkError();
3111     }
3112 
3113 
xTPSV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix)3114     private void xTPSV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) {
3115         for (Allocation matA : mMatrix) {
3116             for (Allocation vecX : mMatrix) {
3117                 if (!validateVecInput(vecX)) {
3118                     continue;
3119                 }
3120                 Element elemA = matA.getType().getElement();
3121                 if (validateTPMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) {
3122                     try {
3123                         if (elemA.isCompatible(Element.F32(mRS))) {
3124                             mBLAS.STPSV(Uplo, TransA, Diag, matA, vecX, incX);
3125                         } else if (elemA.isCompatible(Element.F64(mRS))) {
3126                             mBLAS.DTPSV(Uplo, TransA, Diag, matA, vecX, incX);
3127                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
3128                             mBLAS.CTPSV(Uplo, TransA, Diag, matA, vecX, incX);
3129                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
3130                             mBLAS.ZTPSV(Uplo, TransA, Diag, matA, vecX, incX);
3131                         }
3132                     } catch (RSRuntimeException e) {
3133                         fail("should NOT throw RSRuntimeException");
3134                     }
3135                 } else {
3136                     try {
3137                         mBLAS.STPSV(Uplo, TransA, Diag, matA, vecX, incX);
3138                         fail("should throw RSRuntimeException for STPSV");
3139                     } catch (RSRuntimeException e) {
3140                     }
3141                     try {
3142                         mBLAS.DTPSV(Uplo, TransA, Diag, matA, vecX, incX);
3143                         fail("should throw RSRuntimeException for DTPSV");
3144                     } catch (RSRuntimeException e) {
3145                     }
3146                     try {
3147                         mBLAS.CTPSV(Uplo, TransA, Diag, matA, vecX, incX);
3148                         fail("should throw RSRuntimeException for CTPSV");
3149                     } catch (RSRuntimeException e) {
3150                     }
3151                     try {
3152                         mBLAS.ZTPSV(Uplo, TransA, Diag, matA, vecX, incX);
3153                         fail("should throw RSRuntimeException for ZTPSV");
3154                     } catch (RSRuntimeException e) {
3155                     }
3156                 }
3157             }
3158         }
3159     }
3160 
L2_xTPSV_API(ArrayList<Allocation> mMatrix)3161     public void L2_xTPSV_API(ArrayList<Allocation> mMatrix) {
3162         for (int Uplo : mUplo) {
3163             for (int TransA : mTranspose) {
3164                 for (int Diag : mDiag) {
3165                     for (int incX : mInc) {
3166                         xTPSV_API_test(Uplo, TransA, Diag, incX, mMatrix);
3167                     }
3168                 }
3169             }
3170         }
3171     }
3172 
test_L2_STPSV_API()3173     public void test_L2_STPSV_API() {
3174         L2_xTPSV_API(mMatrixS);
3175     }
3176 
test_L2_DTPSV_API()3177     public void test_L2_DTPSV_API() {
3178         L2_xTPSV_API(mMatrixD);
3179     }
3180 
test_L2_CTPSV_API()3181     public void test_L2_CTPSV_API() {
3182         L2_xTPSV_API(mMatrixC);
3183     }
3184 
test_L2_ZTPSV_API()3185     public void test_L2_ZTPSV_API() {
3186         L2_xTPSV_API(mMatrixZ);
3187     }
3188 
test_L2_STPSV_Correctness()3189     public void test_L2_STPSV_Correctness() {
3190         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
3191         int uplo = ScriptIntrinsicBLAS.UPPER;
3192         int diag = ScriptIntrinsicBLAS.NON_UNIT;
3193         int incX = 1;
3194 
3195         // Populate input allocations
3196         int N = mBLASData.dN;
3197         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
3198         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
3199         matrixAS.copyFrom(mBLASData.L2_sTRSV_A_nn_pu);
3200         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1);
3201 
3202         // Test for the default case: NO_TRANS
3203         mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX);
3204         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
3205         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN);
3206         verifyMatrix(vectorXRef, vectorXS);
3207 
3208         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
3209         trans = ScriptIntrinsicBLAS.TRANSPOSE;
3210         // Reload vector X, since it was overwritten by BLAS.
3211         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1);
3212         mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX);
3213         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UT);
3214         verifyMatrix(vectorXRef, vectorXS);
3215 
3216         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
3217         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1);
3218         mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX);
3219         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UH);
3220         verifyMatrix(vectorXRef, vectorXS);
3221 
3222         // Test for incX = 2;
3223         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
3224         incX = 2;
3225         int dimX = 1 + (N - 1) * incX;
3226         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
3227         vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n2);
3228 
3229         mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX);
3230         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
3231         vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN2);
3232         verifyMatrix(vectorXRef, vectorXS);
3233 
3234         mRS.finish();
3235         checkError();
3236     }
3237 
test_L2_DTPSV_Correctness()3238     public void test_L2_DTPSV_Correctness() {
3239         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
3240         int uplo = ScriptIntrinsicBLAS.UPPER;
3241         int diag = ScriptIntrinsicBLAS.NON_UNIT;
3242         int incX = 1;
3243 
3244         // Populate input allocations
3245         int N = mBLASData.dN;
3246         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
3247         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
3248         matrixAD.copyFrom(mBLASData.L2_dTRSV_A_nn_pu);
3249         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1);
3250 
3251         // Test for the default case: NO_TRANS
3252         mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX);
3253         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
3254         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN);
3255         verifyMatrix(vectorXRef, vectorXD);
3256 
3257         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
3258         trans = ScriptIntrinsicBLAS.TRANSPOSE;
3259         // Reload vector X, since it was overwritten by BLAS.
3260         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1);
3261         mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX);
3262         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UT);
3263         verifyMatrix(vectorXRef, vectorXD);
3264 
3265         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
3266         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1);
3267         mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX);
3268         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UH);
3269         verifyMatrix(vectorXRef, vectorXD);
3270 
3271         // Test for incX = 2;
3272         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
3273         incX = 2;
3274         int dimX = 1 + (N - 1) * incX;
3275         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
3276         vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n2);
3277 
3278         mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX);
3279         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
3280         vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN2);
3281         verifyMatrix(vectorXRef, vectorXD);
3282 
3283         mRS.finish();
3284         checkError();
3285     }
3286 
test_L2_CTPSV_Correctness()3287     public void test_L2_CTPSV_Correctness() {
3288         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
3289         int uplo = ScriptIntrinsicBLAS.UPPER;
3290         int diag = ScriptIntrinsicBLAS.NON_UNIT;
3291         int incX = 1;
3292 
3293         // Populate input allocations
3294         int N = mBLASData.dN;
3295         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
3296         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
3297         matrixAC.copyFrom(mBLASData.L2_cTRSV_A_nn_pu);
3298         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1);
3299 
3300         // Test for the default case: NO_TRANS
3301         mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX);
3302         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
3303         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN);
3304         verifyMatrix(vectorXRef, vectorXC);
3305 
3306         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
3307         trans = ScriptIntrinsicBLAS.TRANSPOSE;
3308         // Reload vector X, since it was overwritten by BLAS.
3309         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1);
3310         mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX);
3311         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UT);
3312         verifyMatrix(vectorXRef, vectorXC);
3313 
3314         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
3315         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1);
3316         mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX);
3317         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UH);
3318         verifyMatrix(vectorXRef, vectorXC);
3319 
3320         // Test for incX = 2;
3321         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
3322         incX = 2;
3323         int dimX = 1 + (N - 1) * incX;
3324         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
3325         vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n2);
3326 
3327         mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX);
3328         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
3329         vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN2);
3330         verifyMatrix(vectorXRef, vectorXC);
3331 
3332         mRS.finish();
3333         checkError();
3334     }
3335 
test_L2_ZTPSV_Correctness()3336     public void test_L2_ZTPSV_Correctness() {
3337         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
3338         int uplo = ScriptIntrinsicBLAS.UPPER;
3339         int diag = ScriptIntrinsicBLAS.NON_UNIT;
3340         int incX = 1;
3341 
3342         // Populate input allocations
3343         int N = mBLASData.dN;
3344         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
3345         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
3346         matrixAZ.copyFrom(mBLASData.L2_zTRSV_A_nn_pu);
3347         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1);
3348 
3349         // Test for the default case: NO_TRANS
3350         mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
3351         Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
3352         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN);
3353         verifyMatrix(vectorXRef, vectorXZ);
3354 
3355         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
3356         trans = ScriptIntrinsicBLAS.TRANSPOSE;
3357         // Reload vector X, since it was overwritten by BLAS.
3358         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1);
3359         mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
3360         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UT);
3361         verifyMatrix(vectorXRef, vectorXZ);
3362 
3363         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
3364         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1);
3365         mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
3366         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UH);
3367         verifyMatrix(vectorXRef, vectorXZ);
3368 
3369         // Test for incX = 2;
3370         trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
3371         incX = 2;
3372         int dimX = 1 + (N - 1) * incX;
3373         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
3374         vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n2);
3375 
3376         mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX);
3377         vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
3378         vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN2);
3379         verifyMatrix(vectorXRef, vectorXZ);
3380 
3381         mRS.finish();
3382         checkError();
3383     }
3384 
3385 
validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)3386     private boolean validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
3387         if (!A.getType().getElement().isCompatible(e) ||
3388             !X.getType().getElement().isCompatible(e) ||
3389             !Y.getType().getElement().isCompatible(e) ) {
3390             return false;
3391         }
3392 
3393         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
3394             return false;
3395         }
3396 
3397         int M = A.getType().getY();
3398         int N = A.getType().getX();
3399 
3400         if (N < 1 || M < 1) {
3401             return false;
3402         }
3403         if (incX <= 0 || incY <= 0) {
3404             return false;
3405         }
3406         int expectedXDim = 1 + (M - 1) * incX;
3407         if (X.getType().getX() != expectedXDim) {
3408             return false;
3409         }
3410         int expectedYDim = 1 + (N - 1) * incY;
3411         if (Y.getType().getX() != expectedYDim) {
3412             return false;
3413         }
3414         return true;
3415     }
3416 
3417 
xGER_API_test(int incX, int incY, ArrayList<Allocation> mMatrix)3418     private void xGER_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) {
3419         for (Allocation matA : mMatrix) {
3420             for (Allocation vecX : mMatrix) {
3421                 if (!validateVecInput(vecX)) {
3422                     continue;
3423                 }
3424                 for (Allocation vecY : mMatrix) {
3425                     if (!validateVecInput(vecY)) {
3426                         continue;
3427                     }
3428                     Element elemA = matA.getType().getElement();
3429                     if (validateGER(elemA, vecX, incX, vecY, incY, matA)) {
3430                         try {
3431                             if (elemA.isCompatible(Element.F32(mRS))) {
3432                                 mBLAS.SGER(alphaS, vecX, incX, vecY, incY, matA);
3433                             } else if (elemA.isCompatible(Element.F64(mRS))) {
3434                                 mBLAS.DGER(alphaD, vecX, incX, vecY, incY, matA);
3435                             }
3436                         } catch (RSRuntimeException e) {
3437                             fail("should NOT throw RSRuntimeException");
3438                         }
3439                     } else {
3440                         try {
3441                             mBLAS.SGER(alphaS, vecX, incX, vecY, incY, matA);
3442                             fail("should throw RSRuntimeException for SGER");
3443                         } catch (RSRuntimeException e) {
3444                         }
3445                         try {
3446                             mBLAS.DGER(alphaD, vecX, incX, vecY, incY, matA);
3447                             fail("should throw RSRuntimeException for DGER");
3448                         } catch (RSRuntimeException e) {
3449                         }
3450                     }
3451                 }
3452             }
3453         }
3454     }
3455 
L2_xGER_API(ArrayList<Allocation> mMatrix)3456     private void L2_xGER_API(ArrayList<Allocation> mMatrix) {
3457         for (int incX : mInc) {
3458             for (int incY : mInc) {
3459                 xGERU_API_test(incX, incY, mMatrix);
3460             }
3461         }
3462     }
3463 
test_L2_SGER_API()3464     public void test_L2_SGER_API() {
3465         L2_xGER_API(mMatrixS);
3466     }
3467 
test_L2_DGER_API()3468     public void test_L2_DGER_API() {
3469         L2_xGER_API(mMatrixD);
3470     }
3471 
test_L2_SGER_Correctness()3472     public void test_L2_SGER_Correctness() {
3473         int incX = 1;
3474         int incY = 1;
3475 
3476         // Populate input allocations
3477         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
3478         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1));
3479         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
3480         matrixAS.copyFrom(mBLASData.L2_sGER_A_mn);
3481         vectorXS.copyFrom(mBLASData.L2_sGER_x_m1);
3482         vectorYS.copyFrom(mBLASData.L2_sGER_y_n1);
3483 
3484         // Test for the default case: NO_TRANS
3485         mBLAS.SGER(alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
3486         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
3487         matrixARef.copyFrom(mBLASData.L2_sGER_o_N);
3488         verifyMatrix(matrixARef, matrixAS);
3489 
3490         // Test for incX = 2 & incY = 3;
3491         incX = 2;
3492         incY = 3;
3493         int dimX = 1 + (mBLASData.dM - 1) * incX;
3494         int dimY = 1 + (mBLASData.dN - 1) * incY;
3495         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
3496         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
3497         vectorXS.copyFrom(mBLASData.L2_sGER_x_m2);
3498         vectorYS.copyFrom(mBLASData.L2_sGER_y_n2);
3499         matrixAS.copyFrom(mBLASData.L2_sGER_A_mn);
3500 
3501         mBLAS.SGER(alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
3502         verifyMatrix(matrixARef, matrixAS);
3503 
3504         mRS.finish();
3505         checkError();
3506     }
3507 
test_L2_DGER_Correctness()3508     public void test_L2_DGER_Correctness() {
3509         int incX = 1;
3510         int incY = 1;
3511 
3512         // Populate input allocations
3513         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
3514         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1));
3515         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
3516         matrixAD.copyFrom(mBLASData.L2_dGER_A_mn);
3517         vectorXD.copyFrom(mBLASData.L2_dGER_x_m1);
3518         vectorYD.copyFrom(mBLASData.L2_dGER_y_n1);
3519 
3520         // Test for the default case: NO_TRANS
3521         mBLAS.DGER(alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
3522         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
3523         matrixARef.copyFrom(mBLASData.L2_dGER_o_N);
3524         verifyMatrix(matrixARef, matrixAD);
3525 
3526         // Test for incX = 2 & incY = 3;
3527         incX = 2;
3528         incY = 3;
3529         int dimX = 1 + (mBLASData.dM - 1) * incX;
3530         int dimY = 1 + (mBLASData.dN - 1) * incY;
3531         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
3532         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
3533         vectorXD.copyFrom(mBLASData.L2_dGER_x_m2);
3534         vectorYD.copyFrom(mBLASData.L2_dGER_y_n2);
3535         matrixAD.copyFrom(mBLASData.L2_dGER_A_mn);
3536 
3537         mBLAS.DGER(alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
3538         verifyMatrix(matrixARef, matrixAD);
3539 
3540         mRS.finish();
3541         checkError();
3542     }
3543 
3544 
validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)3545     private boolean validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
3546         if (!A.getType().getElement().isCompatible(e) ||
3547             !X.getType().getElement().isCompatible(e) ||
3548             !Y.getType().getElement().isCompatible(e)) {
3549             return false;
3550         }
3551         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
3552             return false;
3553         }
3554 
3555         int M = A.getType().getY();
3556         int N = A.getType().getX();
3557         if (incX <= 0 || incY <= 0) {
3558             return false;
3559         }
3560         int expectedXDim = 1 + (M - 1) * incX;
3561         if (X.getType().getX() != expectedXDim) {
3562             return false;
3563         }
3564         int expectedYDim = 1 + (N - 1) * incY;
3565         if (Y.getType().getX() != expectedYDim) {
3566             return false;
3567         }
3568         return true;
3569     }
3570 
xGERU_API_test(int incX, int incY, ArrayList<Allocation> mMatrix)3571     private void xGERU_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) {
3572         for (Allocation matA : mMatrix) {
3573             for (Allocation vecX : mMatrix) {
3574                 if (!validateVecInput(vecX)) {
3575                     continue;
3576                 }
3577                 for (Allocation vecY : mMatrix) {
3578                     if (!validateVecInput(vecY)) {
3579                         continue;
3580                     }
3581                     Element elemA = matA.getType().getElement();
3582                     if (validateGERU(elemA, vecX, incX, vecY, incY, matA)) {
3583                         try {
3584                             if (elemA.isCompatible(Element.F32_2(mRS))) {
3585                                 mBLAS.CGERU(alphaC, vecX, incX, vecY, incY, matA);
3586                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
3587                                 mBLAS.ZGERU(alphaZ, vecX, incX, vecY, incY, matA);
3588                             }
3589                         } catch (RSRuntimeException e) {
3590                             fail("should NOT throw RSRuntimeException");
3591                         }
3592                     } else {
3593                         try {
3594                             mBLAS.CGERU(alphaC, vecX, incX, vecY, incY, matA);
3595                             fail("should throw RSRuntimeException for CGERU");
3596                         } catch (RSRuntimeException e) {
3597                         }
3598                         try {
3599                             mBLAS.ZGERU(alphaZ, vecX, incX, vecY, incY, matA);
3600                             fail("should throw RSRuntimeException for ZGERU");
3601                         } catch (RSRuntimeException e) {
3602                         }
3603                     }
3604                 }
3605             }
3606         }
3607     }
3608 
L2_xGERU_API(ArrayList<Allocation> mMatrix)3609     private void L2_xGERU_API(ArrayList<Allocation> mMatrix) {
3610         for (int incX : mInc) {
3611             for (int incY : mInc) {
3612                 xGERU_API_test(incX, incY, mMatrix);
3613             }
3614         }
3615     }
3616 
test_L2_CGERU_API()3617     public void test_L2_CGERU_API() {
3618         L2_xGERU_API(mMatrixC);
3619     }
3620 
test_L2_ZGERU_API()3621     public void test_L2_ZGERU_API() {
3622         L2_xGERU_API(mMatrixZ);
3623     }
3624 
test_L2_CGERU_Correctness()3625     public void test_L2_CGERU_Correctness() {
3626         int incX = 1;
3627         int incY = 1;
3628 
3629         // Populate input allocations
3630         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
3631         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1));
3632         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
3633         matrixAC.copyFrom(mBLASData.L2_cGERU_A_mn);
3634         vectorXC.copyFrom(mBLASData.L2_cGERU_x_m1);
3635         vectorYC.copyFrom(mBLASData.L2_cGERU_y_n1);
3636 
3637         // Test for the default case: NO_TRANS
3638         mBLAS.CGERU(alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
3639         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
3640         matrixARef.copyFrom(mBLASData.L2_cGERU_o_N);
3641         verifyMatrix(matrixARef, matrixAC);
3642 
3643         // Test for incX = 2 & incY = 3;
3644         incX = 2;
3645         incY = 3;
3646         int dimX = 1 + (mBLASData.dM - 1) * incX;
3647         int dimY = 1 + (mBLASData.dN - 1) * incY;
3648         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
3649         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
3650         vectorXC.copyFrom(mBLASData.L2_cGERU_x_m2);
3651         vectorYC.copyFrom(mBLASData.L2_cGERU_y_n2);
3652         matrixAC.copyFrom(mBLASData.L2_cGERU_A_mn);
3653 
3654         mBLAS.CGERU(alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
3655         verifyMatrix(matrixARef, matrixAC);
3656 
3657         mRS.finish();
3658         checkError();
3659     }
3660 
test_L2_ZGERU_Correctness()3661     public void test_L2_ZGERU_Correctness() {
3662         int incX = 1;
3663         int incY = 1;
3664 
3665         // Populate input allocations
3666         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
3667         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1));
3668         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
3669         matrixAZ.copyFrom(mBLASData.L2_zGERU_A_mn);
3670         vectorXZ.copyFrom(mBLASData.L2_zGERU_x_m1);
3671         vectorYZ.copyFrom(mBLASData.L2_zGERU_y_n1);
3672 
3673         // Test for the default case: NO_TRANS
3674         mBLAS.ZGERU(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
3675         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
3676         matrixARef.copyFrom(mBLASData.L2_zGERU_o_N);
3677         verifyMatrix(matrixARef, matrixAZ);
3678 
3679         // Test for incX = 2 & incY = 3;
3680         incX = 2;
3681         incY = 3;
3682         int dimX = 1 + (mBLASData.dM - 1) * incX;
3683         int dimY = 1 + (mBLASData.dN - 1) * incY;
3684         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
3685         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
3686         vectorXZ.copyFrom(mBLASData.L2_zGERU_x_m2);
3687         vectorYZ.copyFrom(mBLASData.L2_zGERU_y_n2);
3688         matrixAZ.copyFrom(mBLASData.L2_zGERU_A_mn);
3689 
3690         mBLAS.ZGERU(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
3691         verifyMatrix(matrixARef, matrixAZ);
3692 
3693         mRS.finish();
3694         checkError();
3695     }
3696 
3697 
3698 
xGERC_API_test(int incX, int incY, ArrayList<Allocation> mMatrix)3699     private void xGERC_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) {
3700         for (Allocation matA : mMatrix) {
3701             for (Allocation vecX : mMatrix) {
3702                 if (!validateVecInput(vecX)) {
3703                     continue;
3704                 }
3705                 for (Allocation vecY : mMatrix) {
3706                     if (!validateVecInput(vecY)) {
3707                         continue;
3708                     }
3709                     Element elemA = matA.getType().getElement();
3710                     if (validateGERU(elemA, vecX, incX, vecY, incY, matA)) {
3711                         try {
3712                             if (elemA.isCompatible(Element.F32_2(mRS))) {
3713                                 mBLAS.CGERC(alphaC, vecX, incX, vecY, incY, matA);
3714                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
3715                                 mBLAS.ZGERC(alphaZ, vecX, incX, vecY, incY, matA);
3716                             }
3717                         } catch (RSRuntimeException e) {
3718                             fail("should NOT throw RSRuntimeException");
3719                         }
3720                     } else {
3721                         try {
3722                             mBLAS.CGERC(alphaC, vecX, incX, vecY, incY, matA);
3723                             fail("should throw RSRuntimeException for CGERC");
3724                         } catch (RSRuntimeException e) {
3725                         }
3726                         try {
3727                             mBLAS.ZGERC(alphaZ, vecX, incX, vecY, incY, matA);
3728                             fail("should throw RSRuntimeException for ZGERC");
3729                         } catch (RSRuntimeException e) {
3730                         }
3731                     }
3732                 }
3733             }
3734         }
3735     }
3736 
L2_xGERC_API(ArrayList<Allocation> mMatrix)3737     private void L2_xGERC_API(ArrayList<Allocation> mMatrix) {
3738         for (int incX : mInc) {
3739             for (int incY : mInc) {
3740                 xGERC_API_test(incX, incY, mMatrix);
3741             }
3742         }
3743     }
3744 
test_L2_CGERC_API()3745     public void test_L2_CGERC_API() {
3746         L2_xGERC_API(mMatrixC);
3747     }
3748 
test_L2_ZGERC_API()3749     public void test_L2_ZGERC_API() {
3750         L2_xGERC_API(mMatrixZ);
3751     }
3752 
test_L2_CGERC_Correctness()3753     public void test_L2_CGERC_Correctness() {
3754         int incX = 1;
3755         int incY = 1;
3756 
3757         // Populate input allocations
3758         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
3759         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1));
3760         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
3761         matrixAC.copyFrom(mBLASData.L2_cGERC_A_mn);
3762         vectorXC.copyFrom(mBLASData.L2_cGERC_x_m1);
3763         vectorYC.copyFrom(mBLASData.L2_cGERC_y_n1);
3764 
3765         // Test for the default case: NO_TRANS
3766         mBLAS.CGERC(alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
3767         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
3768         matrixARef.copyFrom(mBLASData.L2_cGERC_o_N);
3769         verifyMatrix(matrixARef, matrixAC);
3770 
3771         // Test for incX = 2 & incY = 3;
3772         incX = 2;
3773         incY = 3;
3774         int dimX = 1 + (mBLASData.dM - 1) * incX;
3775         int dimY = 1 + (mBLASData.dN - 1) * incY;
3776         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
3777         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
3778         vectorXC.copyFrom(mBLASData.L2_cGERC_x_m2);
3779         vectorYC.copyFrom(mBLASData.L2_cGERC_y_n2);
3780         matrixAC.copyFrom(mBLASData.L2_cGERC_A_mn);
3781 
3782         mBLAS.CGERC(alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
3783         verifyMatrix(matrixARef, matrixAC);
3784 
3785         mRS.finish();
3786         checkError();
3787     }
3788 
test_L2_ZGERC_Correctness()3789     public void test_L2_ZGERC_Correctness() {
3790         int incX = 1;
3791         int incY = 1;
3792 
3793         // Populate input allocations
3794         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
3795         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1));
3796         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
3797         matrixAZ.copyFrom(mBLASData.L2_zGERC_A_mn);
3798         vectorXZ.copyFrom(mBLASData.L2_zGERC_x_m1);
3799         vectorYZ.copyFrom(mBLASData.L2_zGERC_y_n1);
3800 
3801         // Test for the default case: NO_TRANS
3802         mBLAS.ZGERC(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
3803         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
3804         matrixARef.copyFrom(mBLASData.L2_zGERC_o_N);
3805         verifyMatrix(matrixARef, matrixAZ);
3806 
3807         // Test for incX = 2 & incY = 3;
3808         incX = 2;
3809         incY = 3;
3810         int dimX = 1 + (mBLASData.dM - 1) * incX;
3811         int dimY = 1 + (mBLASData.dN - 1) * incY;
3812         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
3813         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
3814         vectorXZ.copyFrom(mBLASData.L2_zGERC_x_m2);
3815         vectorYZ.copyFrom(mBLASData.L2_zGERC_y_n2);
3816         matrixAZ.copyFrom(mBLASData.L2_zGERC_A_mn);
3817 
3818         mBLAS.ZGERC(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
3819         verifyMatrix(matrixARef, matrixAZ);
3820 
3821         mRS.finish();
3822         checkError();
3823     }
3824 
3825 
xHER_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix)3826     private void xHER_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) {
3827         for (Allocation matA : mMatrix) {
3828             for (Allocation vecX : mMatrix) {
3829                 if (!validateVecInput(vecX)) {
3830                     continue;
3831                 }
3832                 Element elemA = matA.getType().getElement();
3833                 if (validateSYR(elemA, Uplo, vecX, incX, matA)) {
3834                     try {
3835                         if (elemA.isCompatible(Element.F32_2(mRS))) {
3836                             mBLAS.CHER(Uplo, alphaS, vecX, incX, matA);
3837                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
3838                             mBLAS.ZHER(Uplo, alphaD, vecX, incX, matA);
3839                         }
3840                     } catch (RSRuntimeException e) {
3841                         fail("should NOT throw RSRuntimeException");
3842                     }
3843                 } else {
3844                     try {
3845                         mBLAS.CHER(Uplo, alphaS, vecX, incX, matA);
3846                         fail("should throw RSRuntimeException for CHER");
3847                     } catch (RSRuntimeException e) {
3848                     }
3849                     try {
3850                         mBLAS.ZHER(Uplo, alphaD, vecX, incX, matA);
3851                         fail("should throw RSRuntimeException for ZHER");
3852                     } catch (RSRuntimeException e) {
3853                     }
3854                 }
3855             }
3856         }
3857     }
3858 
L2_xHER_API(ArrayList<Allocation> mMatrix)3859     public void L2_xHER_API(ArrayList<Allocation> mMatrix) {
3860         for (int Uplo : mUplo) {
3861             for (int incX : mInc) {
3862                 xHER_API_test(Uplo, incX, mMatrix);
3863             }
3864         }
3865     }
3866 
test_L2_CHER_API()3867     public void test_L2_CHER_API() {
3868         L2_xHER_API(mMatrixC);
3869     }
3870 
test_L2_ZHER_API()3871     public void test_L2_ZHER_API() {
3872         L2_xHER_API(mMatrixZ);
3873     }
3874 
test_L2_CHER_Correctness()3875     public void test_L2_CHER_Correctness() {
3876         int uplo = ScriptIntrinsicBLAS.UPPER;
3877         int incX = 1;
3878 
3879         // Populate input allocations
3880         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
3881         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
3882         matrixAC.copyFrom(mBLASData.L2_cHER_A_nn);
3883         vectorXC.copyFrom(mBLASData.L2_cHER_x_n1);
3884 
3885         // Test for the default case: NO_TRANS
3886         mBLAS.CHER(uplo, alphaS, vectorXC, incX, matrixAC);
3887         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
3888         matrixARef.copyFrom(mBLASData.L2_cHER_o_N);
3889         verifyMatrix(matrixARef, matrixAC, true);
3890 
3891         // Test for incX = 2;
3892         incX = 2;
3893         int dimX = 1 + (mBLASData.dN - 1) * incX;
3894         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
3895         vectorXC.copyFrom(mBLASData.L2_cHER_x_n2);
3896         matrixAC.copyFrom(mBLASData.L2_cHER_A_nn);
3897 
3898         mBLAS.CHER(uplo, alphaS, vectorXC, incX, matrixAC);
3899         verifyMatrix(matrixARef, matrixAC, true);
3900 
3901         mRS.finish();
3902         checkError();
3903     }
3904 
test_L2_ZHER_Correctness()3905     public void test_L2_ZHER_Correctness() {
3906         int uplo = ScriptIntrinsicBLAS.UPPER;
3907         int incX = 1;
3908 
3909         // Populate input allocations
3910         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
3911         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
3912         matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn);
3913         vectorXZ.copyFrom(mBLASData.L2_zHER_x_n1);
3914 
3915         // Test for the default case: NO_TRANS
3916         mBLAS.ZHER(uplo, alphaD, vectorXZ, incX, matrixAZ);
3917         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
3918         matrixARef.copyFrom(mBLASData.L2_zHER_o_N);
3919         verifyMatrix(matrixARef, matrixAZ, true);
3920 
3921         // Test for incX = 2;
3922         incX = 2;
3923         int dimX = 1 + (mBLASData.dN - 1) * incX;
3924         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
3925         vectorXZ.copyFrom(mBLASData.L2_zHER_x_n2);
3926         matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn);
3927 
3928         mBLAS.ZHER(uplo, alphaD, vectorXZ, incX, matrixAZ);
3929         verifyMatrix(matrixARef, matrixAZ, true);
3930 
3931         mRS.finish();
3932         checkError();
3933     }
3934 
3935 
xHPR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix)3936     private void xHPR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) {
3937         for (Allocation matA : mMatrix) {
3938             for (Allocation vecX : mMatrix) {
3939                 if (!validateVecInput(vecX)) {
3940                     continue;
3941                 }
3942                 Element elemA = matA.getType().getElement();
3943                 if (validateSPR(elemA, Uplo, vecX, incX, matA)) {
3944                     try {
3945                         if (elemA.isCompatible(Element.F32_2(mRS))) {
3946                             mBLAS.CHPR(Uplo, alphaS, vecX, incX, matA);
3947                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
3948                             mBLAS.ZHPR(Uplo, alphaD, vecX, incX, matA);
3949                         }
3950                     } catch (RSRuntimeException e) {
3951                         fail("should NOT throw RSRuntimeException");
3952                     }
3953                 } else {
3954                     try {
3955                         mBLAS.CHPR(Uplo, alphaS, vecX, incX, matA);
3956                         fail("should throw RSRuntimeException for CHPR");
3957                     } catch (RSRuntimeException e) {
3958                     }
3959                     try {
3960                         mBLAS.ZHPR(Uplo, alphaD, vecX, incX, matA);
3961                         fail("should throw RSRuntimeException for ZHPR");
3962                     } catch (RSRuntimeException e) {
3963                     }
3964                 }
3965             }
3966         }
3967     }
3968 
L2_xHPR_API(ArrayList<Allocation> mMatrix)3969     public void L2_xHPR_API(ArrayList<Allocation> mMatrix) {
3970         for (int Uplo : mUplo) {
3971             for (int incX : mInc) {
3972                 xHPR_API_test(Uplo, incX, mMatrix);
3973             }
3974         }
3975     }
3976 
test_L2_CHPR_API()3977     public void test_L2_CHPR_API() {
3978         L2_xHPR_API(mMatrixC);
3979     }
3980 
test_L2_ZHPR_API()3981     public void test_L2_ZHPR_API() {
3982         L2_xHPR_API(mMatrixZ);
3983     }
3984 
test_L2_CHPR_Correctness()3985     public void test_L2_CHPR_Correctness() {
3986         int uplo = ScriptIntrinsicBLAS.UPPER;
3987         int incX = 1;
3988 
3989         // Populate input allocations
3990         int N = mBLASData.dN;
3991         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
3992         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
3993         matrixAC.copyFrom(mBLASData.L2_cHER_A_nn_pu);
3994         vectorXC.copyFrom(mBLASData.L2_cHER_x_n1);
3995 
3996         // Test for the default case: NO_TRANS
3997         mBLAS.CHPR(uplo, alphaS, vectorXC, incX, matrixAC);
3998         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
3999         matrixARef.copyFrom(mBLASData.L2_cHER_o_N_pu);
4000         verifyMatrix(matrixARef, matrixAC, true);
4001 
4002         // Test for incX = 2;
4003         incX = 2;
4004         int dimX = 1 + (N - 1) * incX;
4005         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
4006         vectorXC.copyFrom(mBLASData.L2_cHER_x_n2);
4007         matrixAC.copyFrom(mBLASData.L2_cHER_A_nn_pu);
4008 
4009         mBLAS.CHPR(uplo, alphaS, vectorXC, incX, matrixAC);
4010         verifyMatrix(matrixARef, matrixAC, true);
4011 
4012         mRS.finish();
4013         checkError();
4014     }
4015 
test_L2_ZHPR_Correctness()4016     public void test_L2_ZHPR_Correctness() {
4017         int uplo = ScriptIntrinsicBLAS.UPPER;
4018         int incX = 1;
4019 
4020         // Populate input allocations
4021         int N = mBLASData.dN;
4022         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
4023         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
4024         matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn_pu);
4025         vectorXZ.copyFrom(mBLASData.L2_zHER_x_n1);
4026 
4027         // Test for the default case: NO_TRANS
4028         mBLAS.ZHPR(uplo, alphaD, vectorXZ, incX, matrixAZ);
4029         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
4030         matrixARef.copyFrom(mBLASData.L2_zHER_o_N_pu);
4031         verifyMatrix(matrixARef, matrixAZ, true);
4032 
4033         // Test for incX = 2;
4034         incX = 2;
4035         int dimX = 1 + (N - 1) * incX;
4036         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
4037         vectorXZ.copyFrom(mBLASData.L2_zHER_x_n2);
4038         matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn_pu);
4039 
4040         mBLAS.ZHPR(uplo, alphaD, vectorXZ, incX, matrixAZ);
4041         verifyMatrix(matrixARef, matrixAZ, true);
4042 
4043         mRS.finish();
4044         checkError();
4045     }
4046 
4047 
xHER2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)4048     private void xHER2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
4049         for (Allocation matA : mMatrix) {
4050             for (Allocation vecX : mMatrix) {
4051                 if (!validateVecInput(vecX)) {
4052                     continue;
4053                 }
4054                 for (Allocation vecY : mMatrix) {
4055                     if (!validateVecInput(vecY)) {
4056                         continue;
4057                     }
4058                     Element elemA = matA.getType().getElement();
4059                     if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
4060                         try {
4061                             if (elemA.isCompatible(Element.F32_2(mRS))) {
4062                                 mBLAS.CHER2(Uplo, alphaC, vecX, incX, vecY, incY, matA);
4063                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
4064                                 mBLAS.ZHER2(Uplo, alphaZ, vecX, incX, vecY, incY, matA);
4065                             }
4066                         } catch (RSRuntimeException e) {
4067                             fail("should NOT throw RSRuntimeException");
4068                         }
4069                     } else {
4070                         try {
4071                             mBLAS.CHER2(Uplo, alphaC, vecX, incX, vecY, incY, matA);
4072                             fail("should throw RSRuntimeException for CHER2");
4073                         } catch (RSRuntimeException e) {
4074                         }
4075                         try {
4076                             mBLAS.ZHER2(Uplo, alphaZ, vecX, incX, vecY, incY, matA);
4077                             fail("should throw RSRuntimeException for ZHER2");
4078                         } catch (RSRuntimeException e) {
4079                         }
4080                     }
4081                 }
4082             }
4083         }
4084     }
4085 
L2_xHER2_API(ArrayList<Allocation> mMatrix)4086     public void L2_xHER2_API(ArrayList<Allocation> mMatrix) {
4087         for (int Uplo : mUplo) {
4088             for (int incX : mInc) {
4089                 xHER2_API_test(Uplo, incX, incX, mMatrix);
4090             }
4091         }
4092     }
4093 
test_L2_CHER2_API()4094     public void test_L2_CHER2_API() {
4095         L2_xHER2_API(mMatrixC);
4096     }
4097 
test_L2_ZHER2_API()4098     public void test_L2_ZHER2_API() {
4099         L2_xHER2_API(mMatrixZ);
4100     }
4101 
test_L2_CHER2_Correctness()4102     public void test_L2_CHER2_Correctness() {
4103         int uplo = ScriptIntrinsicBLAS.UPPER;
4104         int incX = 1;
4105         int incY = 1;
4106 
4107         // Populate input allocations
4108         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
4109         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
4110         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1));
4111         matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn);
4112         vectorXC.copyFrom(mBLASData.L2_cHER2_x_n1);
4113         vectorYC.copyFrom(mBLASData.L2_cHER2_y_n1);
4114 
4115         // Test for the default case: NO_TRANS
4116         mBLAS.CHER2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
4117         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
4118         matrixARef.copyFrom(mBLASData.L2_cHER2_o_N);
4119         verifyMatrix(matrixARef, matrixAC, true);
4120 
4121         // Test for incX = 2 & incY = 3;
4122         incX = 2;
4123         incY = 3;
4124         int dimX = 1 + (mBLASData.dN - 1) * incX;
4125         int dimY = 1 + (mBLASData.dN - 1) * incY;
4126         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
4127         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
4128         vectorXC.copyFrom(mBLASData.L2_cHER2_x_n2);
4129         vectorYC.copyFrom(mBLASData.L2_cHER2_y_n2);
4130         matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn);
4131 
4132         mBLAS.CHER2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
4133         verifyMatrix(matrixARef, matrixAC, true);
4134 
4135         mRS.finish();
4136         checkError();
4137     }
4138 
test_L2_ZHER2_Correctness()4139     public void test_L2_ZHER2_Correctness() {
4140         int uplo = ScriptIntrinsicBLAS.UPPER;
4141         int incX = 1;
4142         int incY = 1;
4143 
4144         // Populate input allocations
4145         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
4146         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
4147         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1));
4148         matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn);
4149         vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n1);
4150         vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n1);
4151 
4152         // Test for the default case: NO_TRANS
4153         mBLAS.ZHER2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
4154         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
4155         matrixARef.copyFrom(mBLASData.L2_zHER2_o_N);
4156         verifyMatrix(matrixARef, matrixAZ, true);
4157 
4158         // Test for incX = 2 & incY = 3;
4159         incX = 2;
4160         incY = 3;
4161         int dimX = 1 + (mBLASData.dN - 1) * incX;
4162         int dimY = 1 + (mBLASData.dN - 1) * incY;
4163         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
4164         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
4165         vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n2);
4166         vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n2);
4167         matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn);
4168 
4169         mBLAS.ZHER2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
4170         verifyMatrix(matrixARef, matrixAZ, true);
4171 
4172         mRS.finish();
4173         checkError();
4174     }
4175 
4176 
4177 
xHPR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)4178     private void xHPR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
4179         for (Allocation matA : mMatrix) {
4180             for (Allocation vecX : mMatrix) {
4181                 if (!validateVecInput(vecX)) {
4182                     continue;
4183                 }
4184                 for (Allocation vecY : mMatrix) {
4185                     if (!validateVecInput(vecY)) {
4186                         continue;
4187                     }
4188                     Element elemA = matA.getType().getElement();
4189                     if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
4190                         try {
4191                             if (elemA.isCompatible(Element.F32_2(mRS))) {
4192                                 mBLAS.CHPR2(Uplo, alphaC, vecX, incX, vecY, incY, matA);
4193                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
4194                                 mBLAS.ZHPR2(Uplo, alphaZ, vecX, incX, vecY, incY, matA);
4195                             }
4196                         } catch (RSRuntimeException e) {
4197                             fail("should NOT throw RSRuntimeException");
4198                         }
4199                     } else {
4200                         try {
4201                             mBLAS.CHPR2(Uplo, alphaC, vecX, incX, vecY, incY, matA);
4202                             fail("should throw RSRuntimeException for CHPR2");
4203                         } catch (RSRuntimeException e) {
4204                         }
4205                         try {
4206                             mBLAS.ZHPR2(Uplo, alphaZ, vecX, incX, vecY, incY, matA);
4207                             fail("should throw RSRuntimeException for ZHPR2");
4208                         } catch (RSRuntimeException e) {
4209                         }
4210                     }
4211                 }
4212             }
4213         }
4214     }
4215 
L2_xHPR2_API(ArrayList<Allocation> mMatrix)4216     public void L2_xHPR2_API(ArrayList<Allocation> mMatrix) {
4217         for (int Uplo : mUplo) {
4218             for (int incX : mInc) {
4219                 xHPR2_API_test(Uplo, incX, incX, mMatrix);
4220             }
4221         }
4222     }
4223 
test_L2_CHPR2_API()4224     public void test_L2_CHPR2_API() {
4225         L2_xHPR2_API(mMatrixC);
4226     }
4227 
test_L2_ZHPR2_API()4228     public void test_L2_ZHPR2_API() {
4229         L2_xHPR2_API(mMatrixZ);
4230     }
4231 
test_L2_CHPR2_Correctness()4232     public void test_L2_CHPR2_Correctness() {
4233         int uplo = ScriptIntrinsicBLAS.UPPER;
4234         int incX = 1;
4235         int incY = 1;
4236 
4237         // Populate input allocations
4238         int N = mBLASData.dN;
4239         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
4240         Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
4241         Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1));
4242         matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn_pu);
4243         vectorXC.copyFrom(mBLASData.L2_cHER2_x_n1);
4244         vectorYC.copyFrom(mBLASData.L2_cHER2_y_n1);
4245 
4246         // Test for the default case: NO_TRANS
4247         mBLAS.CHPR2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
4248         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1));
4249         matrixARef.copyFrom(mBLASData.L2_cHER2_o_N_pu);
4250         verifyMatrix(matrixARef, matrixAC, true);
4251 
4252         // Test for incX = 2 & incY = 3;
4253         incX = 2;
4254         incY = 3;
4255         int dimX = 1 + (N - 1) * incX;
4256         int dimY = 1 + (N - 1) * incY;
4257         vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1));
4258         vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1));
4259         vectorXC.copyFrom(mBLASData.L2_cHER2_x_n2);
4260         vectorYC.copyFrom(mBLASData.L2_cHER2_y_n2);
4261         matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn_pu);
4262 
4263         mBLAS.CHPR2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC);
4264         verifyMatrix(matrixARef, matrixAC, true);
4265 
4266         mRS.finish();
4267         checkError();
4268     }
4269 
test_L2_ZHPR2_Correctness()4270     public void test_L2_ZHPR2_Correctness() {
4271         int uplo = ScriptIntrinsicBLAS.UPPER;
4272         int incX = 1;
4273         int incY = 1;
4274 
4275         // Populate input allocations
4276         int N = mBLASData.dN;
4277         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
4278         Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
4279         Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1));
4280         matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn_pu);
4281         vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n1);
4282         vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n1);
4283 
4284         // Test for the default case: NO_TRANS
4285         mBLAS.ZHPR2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
4286         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1));
4287         matrixARef.copyFrom(mBLASData.L2_zHER2_o_N_pu);
4288         verifyMatrix(matrixARef, matrixAZ, true);
4289 
4290         // Test for incX = 2 & incY = 3;
4291         incX = 2;
4292         incY = 3;
4293         int dimX = 1 + (N - 1) * incX;
4294         int dimY = 1 + (N - 1) * incY;
4295         vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1));
4296         vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1));
4297         vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n2);
4298         vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n2);
4299         matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn_pu);
4300 
4301         mBLAS.ZHPR2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ);
4302         verifyMatrix(matrixARef, matrixAZ, true);
4303 
4304         mRS.finish();
4305         checkError();
4306     }
4307 
4308 
4309 
validateSYR(Element e, int Uplo, Allocation X, int incX, Allocation A)4310     private boolean validateSYR(Element e, int Uplo, Allocation X, int incX, Allocation A) {
4311         if (!validateUplo(Uplo)) {
4312             return false;
4313         }
4314         if (!A.getType().getElement().isCompatible(e) ||
4315             !X.getType().getElement().isCompatible(e)) {
4316             return false;
4317         }
4318 
4319         int N = A.getType().getX();
4320 
4321         if (X.getType().getY() > 1) {
4322             return false;
4323         }
4324         if (N != A.getType().getY()) {
4325             return false;
4326         }
4327         if (incX <= 0) {
4328             return false;
4329         }
4330         int expectedXDim = 1 + (N - 1) * incX;
4331         if (X.getType().getX() != expectedXDim) {
4332             return false;
4333         }
4334         return true;
4335     }
4336 
xSYR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix)4337     private void xSYR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) {
4338         for (Allocation matA : mMatrix) {
4339             for (Allocation vecX : mMatrix) {
4340                 if (!validateVecInput(vecX)) {
4341                     continue;
4342                 }
4343                 Element elemA = matA.getType().getElement();
4344                 if (validateSYR(elemA, Uplo, vecX, incX, matA)) {
4345                     try {
4346                         if (elemA.isCompatible(Element.F32(mRS))) {
4347                             mBLAS.SSYR(Uplo, alphaS, vecX, incX, matA);
4348                         } else if (elemA.isCompatible(Element.F64(mRS))) {
4349                             mBLAS.DSYR(Uplo, alphaD, vecX, incX, matA);
4350                         }
4351                     } catch (RSRuntimeException e) {
4352                         fail("should NOT throw RSRuntimeException");
4353                     }
4354                 } else {
4355                     try {
4356                         mBLAS.SSYR(Uplo, alphaS, vecX, incX, matA);
4357                         fail("should throw RSRuntimeException for SSYR");
4358                     } catch (RSRuntimeException e) {
4359                     }
4360                     try {
4361                         mBLAS.DSYR(Uplo, alphaD, vecX, incX, matA);
4362                         fail("should throw RSRuntimeException for DSYR");
4363                     } catch (RSRuntimeException e) {
4364                     }
4365                 }
4366             }
4367         }
4368     }
4369 
L2_xSYR_API(ArrayList<Allocation> mMatrix)4370     public void L2_xSYR_API(ArrayList<Allocation> mMatrix) {
4371         for (int Uplo : mUplo) {
4372             for (int incX : mInc) {
4373                 xSYR_API_test(Uplo, incX, mMatrix);
4374             }
4375         }
4376     }
4377 
test_L2_SSYR_API()4378     public void test_L2_SSYR_API() {
4379         L2_xSYR_API(mMatrixS);
4380     }
4381 
test_L2_DSYR_API()4382     public void test_L2_DSYR_API() {
4383         L2_xSYR_API(mMatrixD);
4384     }
4385 
test_L2_SSYR_Correctness()4386     public void test_L2_SSYR_Correctness() {
4387         int uplo = ScriptIntrinsicBLAS.UPPER;
4388         int incX = 1;
4389 
4390         // Populate input allocations
4391         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
4392         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
4393         matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn);
4394         vectorXS.copyFrom(mBLASData.L2_sSYR_x_n1);
4395 
4396         // Test for the default case: NO_TRANS
4397         mBLAS.SSYR(uplo, alphaS, vectorXS, incX, matrixAS);
4398         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
4399         matrixARef.copyFrom(mBLASData.L2_sSYR_o_N);
4400         verifyMatrix(matrixARef, matrixAS, true);
4401 
4402         // Test for incX = 2;
4403         incX = 2;
4404         int dimX = 1 + (mBLASData.dN - 1) * incX;
4405         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
4406         vectorXS.copyFrom(mBLASData.L2_sSYR_x_n2);
4407         matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn);
4408 
4409         mBLAS.SSYR(uplo, alphaS, vectorXS, incX, matrixAS);
4410         verifyMatrix(matrixARef, matrixAS, true);
4411 
4412         mRS.finish();
4413         checkError();
4414     }
4415 
test_L2_DSYR_Correctness()4416     public void test_L2_DSYR_Correctness() {
4417         int uplo = ScriptIntrinsicBLAS.UPPER;
4418         int incX = 1;
4419 
4420         // Populate input allocations
4421         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
4422         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
4423         matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn);
4424         vectorXD.copyFrom(mBLASData.L2_dSYR_x_n1);
4425 
4426         // Test for the default case: NO_TRANS
4427         mBLAS.DSYR(uplo, alphaD, vectorXD, incX, matrixAD);
4428         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
4429         matrixARef.copyFrom(mBLASData.L2_dSYR_o_N);
4430         verifyMatrix(matrixARef, matrixAD, true);
4431 
4432         // Test for incX = 2;
4433         incX = 2;
4434         int dimX = 1 + (mBLASData.dN - 1) * incX;
4435         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
4436         vectorXD.copyFrom(mBLASData.L2_dSYR_x_n2);
4437         matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn);
4438 
4439         mBLAS.DSYR(uplo, alphaD, vectorXD, incX, matrixAD);
4440         verifyMatrix(matrixARef, matrixAD, true);
4441 
4442         mRS.finish();
4443         checkError();
4444     }
4445 
4446 
validateSPR(Element e, int Uplo, Allocation X, int incX, Allocation Ap)4447     private boolean validateSPR(Element e, int Uplo, Allocation X, int incX, Allocation Ap) {
4448         if (!validateUplo(Uplo)) {
4449             return false;
4450         }
4451         if (!Ap.getType().getElement().isCompatible(e) ||
4452             !X.getType().getElement().isCompatible(e)) {
4453             return false;
4454         }
4455         if (X.getType().getY() > 1) {
4456             return false;
4457         }
4458 
4459         if (Ap.getType().getY() > 1) {
4460             return false;
4461         }
4462 
4463         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
4464         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
4465             return false;
4466         }
4467         if (incX <= 0) {
4468             return false;
4469         }
4470         int expectedXDim = 1 + (N - 1) * incX;
4471         if (X.getType().getX() != expectedXDim) {
4472             return false;
4473         }
4474 
4475         return true;
4476     }
4477 
xSPR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix)4478     private void xSPR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) {
4479         for (Allocation matA : mMatrix) {
4480             for (Allocation vecX : mMatrix) {
4481                 if (!validateVecInput(vecX)) {
4482                     continue;
4483                 }
4484                 Element elemA = matA.getType().getElement();
4485                 if (validateSPR(elemA, Uplo, vecX, incX, matA)) {
4486                     try {
4487                         if (elemA.isCompatible(Element.F32(mRS))) {
4488                             mBLAS.SSPR(Uplo, alphaS, vecX, incX, matA);
4489                         } else if (elemA.isCompatible(Element.F64(mRS))) {
4490                             mBLAS.DSPR(Uplo, alphaD, vecX, incX, matA);
4491                         }
4492                     } catch (RSRuntimeException e) {
4493                         fail("should NOT throw RSRuntimeException");
4494                     }
4495                 } else {
4496                     try {
4497                         mBLAS.SSPR(Uplo, alphaS, vecX, incX, matA);
4498                         fail("should throw RSRuntimeException for SSPR");
4499                     } catch (RSRuntimeException e) {
4500                     }
4501                     try {
4502                         mBLAS.DSPR(Uplo, alphaD, vecX, incX, matA);
4503                         fail("should throw RSRuntimeException for DSPR");
4504                     } catch (RSRuntimeException e) {
4505                     }
4506                 }
4507             }
4508         }
4509     }
4510 
L2_xSPR_API(ArrayList<Allocation> mMatrix)4511     public void L2_xSPR_API(ArrayList<Allocation> mMatrix) {
4512         for (int Uplo : mUplo) {
4513             for (int incX : mInc) {
4514                 xSPR_API_test(Uplo, incX, mMatrix);
4515             }
4516         }
4517     }
4518 
test_L2_SSPR_API()4519     public void test_L2_SSPR_API() {
4520         L2_xSPR_API(mMatrixS);
4521     }
4522 
test_L2_DSPR_API()4523     public void test_L2_DSPR_API() {
4524         L2_xSPR_API(mMatrixD);
4525     }
4526 
test_L2_SSPR_Correctness()4527     public void test_L2_SSPR_Correctness() {
4528         int uplo = ScriptIntrinsicBLAS.UPPER;
4529         int incX = 1;
4530 
4531         // Populate input allocations
4532         int N = mBLASData.dN;
4533         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
4534         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
4535         matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn_pu);
4536         vectorXS.copyFrom(mBLASData.L2_sSYR_x_n1);
4537 
4538         // Test for the default case: NO_TRANS
4539         mBLAS.SSPR(uplo, alphaS, vectorXS, incX, matrixAS);
4540         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
4541         matrixARef.copyFrom(mBLASData.L2_sSYR_o_N_pu);
4542         verifyMatrix(matrixARef, matrixAS, true);
4543 
4544         // Test for incX = 2;
4545         incX = 2;
4546         int dimX = 1 + (N - 1) * incX;
4547         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
4548         vectorXS.copyFrom(mBLASData.L2_sSYR_x_n2);
4549         matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn_pu);
4550 
4551         mBLAS.SSPR(uplo, alphaS, vectorXS, incX, matrixAS);
4552         verifyMatrix(matrixARef, matrixAS, true);
4553 
4554         mRS.finish();
4555         checkError();
4556     }
4557 
test_L2_DSPR_Correctness()4558     public void test_L2_DSPR_Correctness() {
4559         int uplo = ScriptIntrinsicBLAS.UPPER;
4560         int incX = 1;
4561 
4562         // Populate input allocations
4563         int N = mBLASData.dN;
4564         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
4565         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
4566         matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn_pu);
4567         vectorXD.copyFrom(mBLASData.L2_dSYR_x_n1);
4568 
4569         // Test for the default case: NO_TRANS
4570         mBLAS.DSPR(uplo, alphaD, vectorXD, incX, matrixAD);
4571         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
4572         matrixARef.copyFrom(mBLASData.L2_dSYR_o_N_pu);
4573         verifyMatrix(matrixARef, matrixAD, true);
4574 
4575         // Test for incX = 2;
4576         incX = 2;
4577         int dimX = 1 + (N - 1) * incX;
4578         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
4579         vectorXD.copyFrom(mBLASData.L2_dSYR_x_n2);
4580         matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn_pu);
4581 
4582         mBLAS.DSPR(uplo, alphaD, vectorXD, incX, matrixAD);
4583         verifyMatrix(matrixARef, matrixAD, true);
4584 
4585         mRS.finish();
4586         checkError();
4587     }
4588 
4589 
validateSYR2(Element e, int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A)4590     private boolean validateSYR2(Element e, int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
4591         if (!validateUplo(Uplo)) {
4592             return false;
4593         }
4594         if (!A.getType().getElement().isCompatible(e) ||
4595             !X.getType().getElement().isCompatible(e) ||
4596             !Y.getType().getElement().isCompatible(e)) {
4597             return false;
4598         }
4599 
4600         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
4601             return false;
4602         }
4603 
4604         int N = A.getType().getX();
4605 
4606         if (N != A.getType().getY()) {
4607             return false;
4608         }
4609         if (incX <= 0 || incY <= 0) {
4610             return false;
4611         }
4612         int expectedXDim = 1 + (N - 1) * incX;
4613         int expectedYDim = 1 + (N - 1) * incY;
4614         if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
4615             return false;
4616         }
4617         return true;
4618     }
4619 
xSYR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)4620     private void xSYR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
4621         for (Allocation matA : mMatrix) {
4622             for (Allocation vecX : mMatrix) {
4623                 if (!validateVecInput(vecX)) {
4624                     continue;
4625                 }
4626                 for (Allocation vecY : mMatrix) {
4627                     if (!validateVecInput(vecY)) {
4628                         continue;
4629                     }
4630                     Element elemA = matA.getType().getElement();
4631                     if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
4632                         try {
4633                             if (elemA.isCompatible(Element.F32(mRS))) {
4634                                 mBLAS.SSYR2(Uplo, alphaS, vecX, incX, vecY, incY, matA);
4635                             } else if (elemA.isCompatible(Element.F64(mRS))) {
4636                                 mBLAS.DSYR2(Uplo, alphaD, vecX, incX, vecY, incY, matA);
4637                             }
4638                         } catch (RSRuntimeException e) {
4639                             fail("should NOT throw RSRuntimeException");
4640                         }
4641                     } else {
4642                         try {
4643                             mBLAS.SSYR2(Uplo, alphaS, vecX, incX, vecY, incY, matA);
4644                             fail("should throw RSRuntimeException for SSYR2");
4645                         } catch (RSRuntimeException e) {
4646                         }
4647                         try {
4648                             mBLAS.DSYR2(Uplo, alphaD, vecX, incX, vecY, incY, matA);
4649                             fail("should throw RSRuntimeException for DSYR2");
4650                         } catch (RSRuntimeException e) {
4651                         }
4652                     }
4653                 }
4654             }
4655         }
4656     }
4657 
L2_xSYR2_API(ArrayList<Allocation> mMatrix)4658     public void L2_xSYR2_API(ArrayList<Allocation> mMatrix) {
4659         for (int Uplo : mUplo) {
4660             for (int incX : mInc) {
4661                 xSYR2_API_test(Uplo, incX, incX, mMatrix);
4662             }
4663         }
4664     }
4665 
test_L2_SSYR2_API()4666     public void test_L2_SSYR2_API() {
4667         L2_xSYR2_API(mMatrixS);
4668     }
4669 
test_L2_DSYR2_API()4670     public void test_L2_DSYR2_API() {
4671         L2_xSYR2_API(mMatrixD);
4672     }
4673 
test_L2_SSYR2_Correctness()4674     public void test_L2_SSYR2_Correctness() {
4675         int uplo = ScriptIntrinsicBLAS.UPPER;
4676         int incX = 1;
4677         int incY = 1;
4678 
4679         // Populate input allocations
4680         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
4681         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
4682         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1));
4683         matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn);
4684         vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n1);
4685         vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n1);
4686 
4687         // Test for the default case: NO_TRANS
4688         mBLAS.SSYR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
4689         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
4690         matrixARef.copyFrom(mBLASData.L2_sSYR2_o_N);
4691         verifyMatrix(matrixARef, matrixAS, true);
4692 
4693         // Test for incX = 2 & incY = 3;
4694         incX = 2;
4695         incY = 3;
4696         int dimX = 1 + (mBLASData.dN - 1) * incX;
4697         int dimY = 1 + (mBLASData.dN - 1) * incY;
4698         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
4699         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
4700         vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n2);
4701         vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n2);
4702         matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn);
4703 
4704         mBLAS.SSYR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
4705         verifyMatrix(matrixARef, matrixAS, true);
4706 
4707         mRS.finish();
4708         checkError();
4709     }
4710 
test_L2_DSYR2_Correctness()4711     public void test_L2_DSYR2_Correctness() {
4712         int uplo = ScriptIntrinsicBLAS.UPPER;
4713         int incX = 1;
4714         int incY = 1;
4715 
4716         // Populate input allocations
4717         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
4718         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
4719         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1));
4720         matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn);
4721         vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n1);
4722         vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n1);
4723 
4724         // Test for the default case: NO_TRANS
4725         mBLAS.DSYR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
4726         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
4727         matrixARef.copyFrom(mBLASData.L2_dSYR2_o_N);
4728         verifyMatrix(matrixARef, matrixAD, true);
4729 
4730         // Test for incX = 2 & incY = 3;
4731         incX = 2;
4732         incY = 3;
4733         int dimX = 1 + (mBLASData.dN - 1) * incX;
4734         int dimY = 1 + (mBLASData.dN - 1) * incY;
4735         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
4736         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
4737         vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n2);
4738         vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n2);
4739         matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn);
4740 
4741         mBLAS.DSYR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
4742         verifyMatrix(matrixARef, matrixAD, true);
4743 
4744         mRS.finish();
4745         checkError();
4746     }
4747 
4748 
validateSPR2(Element e, int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)4749     private boolean validateSPR2(Element e, int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
4750         if (!validateUplo(Uplo)) {
4751             return false;
4752         }
4753         if (!Ap.getType().getElement().isCompatible(e) ||
4754             !X.getType().getElement().isCompatible(e) ||
4755             !Y.getType().getElement().isCompatible(e)) {
4756             return false;
4757         }
4758         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
4759             return false;
4760         }
4761 
4762         if (Ap.getType().getY() > 1) {
4763             return false;
4764         }
4765 
4766         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
4767         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
4768             return false;
4769         }
4770         if (incX <= 0 || incY <= 0) {
4771             return false;
4772         }
4773         int expectedXDim = 1 + (N - 1) * incX;
4774         int expectedYDim = 1 + (N - 1) * incY;
4775         if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
4776             return false;
4777         }
4778 
4779         return true;
4780     }
4781 
xSPR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)4782     private void xSPR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) {
4783         for (Allocation matA : mMatrix) {
4784             for (Allocation vecX : mMatrix) {
4785                 if (!validateVecInput(vecX)) {
4786                     continue;
4787                 }
4788                 for (Allocation vecY : mMatrix) {
4789                     if (!validateVecInput(vecY)) {
4790                         continue;
4791                     }
4792                     Element elemA = matA.getType().getElement();
4793                     if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) {
4794                         try {
4795                             if (elemA.isCompatible(Element.F32(mRS))) {
4796                                 mBLAS.SSPR2(Uplo, alphaS, vecX, incX, vecY, incY, matA);
4797                             } else if (elemA.isCompatible(Element.F64(mRS))) {
4798                                 mBLAS.DSPR2(Uplo, alphaD, vecX, incX, vecY, incY, matA);
4799                             }
4800                         } catch (RSRuntimeException e) {
4801                             fail("should NOT throw RSRuntimeException");
4802                         }
4803                     } else {
4804                         try {
4805                             mBLAS.SSPR2(Uplo, alphaS, vecX, incX, vecY, incY, matA);
4806                             fail("should throw RSRuntimeException for SSPR2");
4807                         } catch (RSRuntimeException e) {
4808                         }
4809                         try {
4810                             mBLAS.DSPR2(Uplo, alphaD, vecX, incX, vecY, incY, matA);
4811                             fail("should throw RSRuntimeException for DSPR2");
4812                         } catch (RSRuntimeException e) {
4813                         }
4814                     }
4815                 }
4816             }
4817         }
4818     }
4819 
L2_xSPR2_API(ArrayList<Allocation> mMatrix)4820     public void L2_xSPR2_API(ArrayList<Allocation> mMatrix) {
4821         for (int Uplo : mUplo) {
4822             for (int incX : mInc) {
4823                 xSPR2_API_test(Uplo, incX, incX, mMatrix);
4824             }
4825         }
4826     }
4827 
test_L2_SSPR2_API()4828     public void test_L2_SSPR2_API() {
4829         L2_xSPR2_API(mMatrixS);
4830     }
4831 
test_L2_DSPR2_API()4832     public void test_L2_DSPR2_API() {
4833         L2_xSPR2_API(mMatrixD);
4834     }
4835 
test_L2_SSPR2_Correctness()4836     public void test_L2_SSPR2_Correctness() {
4837         int uplo = ScriptIntrinsicBLAS.UPPER;
4838         int incX = 1;
4839         int incY = 1;
4840 
4841         // Populate input allocations
4842         int N = mBLASData.dN;
4843         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
4844         Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
4845         Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1));
4846         matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn_pu);
4847         vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n1);
4848         vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n1);
4849 
4850         // Test for the default case: NO_TRANS
4851         mBLAS.SSPR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
4852         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1));
4853         matrixARef.copyFrom(mBLASData.L2_sSYR2_o_N_pu);
4854         verifyMatrix(matrixARef, matrixAS, true);
4855 
4856         // Test for incX = 2 & incY = 3;
4857         incX = 2;
4858         incY = 3;
4859         int dimX = 1 + (N - 1) * incX;
4860         int dimY = 1 + (N - 1) * incY;
4861         vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1));
4862         vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1));
4863         vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n2);
4864         vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n2);
4865         matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn_pu);
4866 
4867         mBLAS.SSPR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS);
4868         verifyMatrix(matrixARef, matrixAS, true);
4869 
4870         mRS.finish();
4871         checkError();
4872     }
4873 
test_L2_DSPR2_Correctness()4874     public void test_L2_DSPR2_Correctness() {
4875         int uplo = ScriptIntrinsicBLAS.UPPER;
4876         int incX = 1;
4877         int incY = 1;
4878 
4879         // Populate input allocations
4880         int N = mBLASData.dN;
4881         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
4882         Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
4883         Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1));
4884         matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn_pu);
4885         vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n1);
4886         vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n1);
4887 
4888         // Test for the default case: NO_TRANS
4889         mBLAS.DSPR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
4890         Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1));
4891         matrixARef.copyFrom(mBLASData.L2_dSYR2_o_N_pu);
4892         verifyMatrix(matrixARef, matrixAD, true);
4893 
4894         // Test for incX = 2 & incY = 3;
4895         incX = 2;
4896         incY = 3;
4897         int dimX = 1 + (N - 1) * incX;
4898         int dimY = 1 + (N - 1) * incY;
4899         vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1));
4900         vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1));
4901         vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n2);
4902         vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n2);
4903         matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn_pu);
4904 
4905         mBLAS.DSPR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD);
4906         verifyMatrix(matrixARef, matrixAD, true);
4907 
4908         mRS.finish();
4909         checkError();
4910     }
4911 
4912 
4913 
validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C)4914     private boolean validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {
4915         int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;
4916         if ((A != null && !A.getType().getElement().isCompatible(e)) ||
4917             (B != null && !B.getType().getElement().isCompatible(e)) ||
4918             (C != null && !C.getType().getElement().isCompatible(e))) {
4919             return false;
4920         }
4921         if (C == null) {
4922             //since matrix C is used to store the result, it cannot be null.
4923             return false;
4924         }
4925         cM = C.getType().getY();
4926         cN = C.getType().getX();
4927 
4928         if (Side == ScriptIntrinsicBLAS.RIGHT) {
4929             if ((A == null && B != null) || (A != null && B == null)) {
4930                 return false;
4931             }
4932             if (B != null) {
4933                 bM = A.getType().getY();
4934                 bN = A.getType().getX();
4935             }
4936             if (A != null) {
4937                 aM = B.getType().getY();
4938                 aN = B.getType().getX();
4939             }
4940         } else {
4941             if (A != null) {
4942                 if (TransA == ScriptIntrinsicBLAS.TRANSPOSE ||
4943                     TransA == ScriptIntrinsicBLAS.CONJ_TRANSPOSE ) {
4944                     aN = A.getType().getY();
4945                     aM = A.getType().getX();
4946                 } else {
4947                     aM = A.getType().getY();
4948                     aN = A.getType().getX();
4949                 }
4950             }
4951             if (B != null) {
4952                 if (TransB == ScriptIntrinsicBLAS.TRANSPOSE ||
4953                     TransB == ScriptIntrinsicBLAS.CONJ_TRANSPOSE ) {
4954                     bN = B.getType().getY();
4955                     bM = B.getType().getX();
4956                 } else {
4957                     bM = B.getType().getY();
4958                     bN = B.getType().getX();
4959                 }
4960             }
4961         }
4962         if (A != null && B != null && C != null) {
4963             if (aN != bM || aM != cM || bN != cN) {
4964                 return false;
4965             }
4966         } else if (A != null && C != null) {
4967             // A and C only, for SYRK
4968             if (cM != cN) {
4969                 return false;
4970             }
4971             if (aM != cM) {
4972                 return false;
4973             }
4974         } else if (A != null && B != null) {
4975             // A and B only
4976             if (aN != bM) {
4977                 return false;
4978             }
4979         }
4980 
4981         return true;
4982     }
4983 
validateL3_xGEMM(Element e, int TransA, int TransB, Allocation A, Allocation B, Allocation C)4984     private boolean validateL3_xGEMM(Element e, int TransA, int TransB, Allocation A, Allocation B, Allocation C) {
4985         boolean result = true;
4986         result &= validateTranspose(TransA);
4987         result &= validateTranspose(TransB);
4988         result &= validateL3(e, TransA, TransB, 0, A, B, C);
4989 
4990         return result;
4991     }
4992 
xGEMM_API_test(int transA, int transB, ArrayList<Allocation> mMatrix)4993     private void xGEMM_API_test(int transA, int transB, ArrayList<Allocation> mMatrix) {
4994         for (Allocation matA : mMatrix) {
4995             for (Allocation matB : mMatrix) {
4996                 for (Allocation matC : mMatrix) {
4997                     Element elemA = matA.getType().getElement();
4998                     if (validateL3_xGEMM(elemA, transA, transB, matA, matB, matC)) {
4999                         try {
5000                             if (elemA.isCompatible(Element.F32(mRS))) {
5001                                 mBLAS.SGEMM(transA, transB, alphaS, matA, matB, betaS, matC);
5002                             } else if (elemA.isCompatible(Element.F64(mRS))) {
5003                                 mBLAS.DGEMM(transA, transB, alphaD, matA, matB, betaD, matC);
5004                             } else if (elemA.isCompatible(Element.F32_2(mRS))) {
5005                                 mBLAS.CGEMM(transA, transB, alphaC, matA, matB, betaC, matC);
5006                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
5007                                 mBLAS.ZGEMM(transA, transB, alphaZ, matA, matB, betaZ, matC);
5008                             }
5009                         } catch (RSRuntimeException e) {
5010                             fail("should NOT throw RSRuntimeException");
5011                         }
5012                     } else {
5013                         try {
5014                             mBLAS.SGEMM(transA, transB, alphaS, matA, matB, betaS, matC);
5015                             fail("should throw RSRuntimeException for SGEMM");
5016                         } catch (RSRuntimeException e) {
5017                         }
5018                         try {
5019                             mBLAS.DGEMM(transA, transB, alphaD, matA, matB, betaD, matC);
5020                             fail("should throw RSRuntimeException for DGEMM");
5021                         } catch (RSRuntimeException e) {
5022                         }
5023                         try {
5024                             mBLAS.CGEMM(transA, transB, alphaC, matA, matB, betaC, matC);
5025                             fail("should throw RSRuntimeException for CGEMM");
5026                         } catch (RSRuntimeException e) {
5027                         }
5028                         try {
5029                             mBLAS.ZGEMM(transA, transB, alphaZ, matA, matB, betaZ, matC);
5030                             fail("should throw RSRuntimeException for ZGEMM");
5031                         } catch (RSRuntimeException e) {
5032                         }
5033                     }
5034                 }
5035             }
5036         }
5037     }
5038 
L3_xGEMM_API(ArrayList<Allocation> mMatrix)5039     private void L3_xGEMM_API(ArrayList<Allocation> mMatrix) {
5040         for (int transA : mTranspose) {
5041             for (int transB : mTranspose) {
5042                 xGEMM_API_test(transA, transB, mMatrix);
5043             }
5044         }
5045     }
5046 
test_L3_SGEMM_API()5047     public void test_L3_SGEMM_API() {
5048         L3_xGEMM_API(mMatrixS);
5049     }
5050 
test_L3_DGEMM_API()5051     public void test_L3_DGEMM_API() {
5052         L3_xGEMM_API(mMatrixD);
5053     }
5054 
test_L3_CGEMM_API()5055     public void test_L3_CGEMM_API() {
5056         L3_xGEMM_API(mMatrixC);
5057     }
5058 
test_L3_ZGEMM_API()5059     public void test_L3_ZGEMM_API() {
5060         L3_xGEMM_API(mMatrixZ);
5061     }
5062 
5063 
test_L3_SGEMM_Correctness()5064     public void test_L3_SGEMM_Correctness() {
5065         int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE;
5066         int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE;
5067 
5068         // Populate input allocations
5069         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dM));
5070         Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK));
5071         Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
5072         matrixAS.copyFrom(mBLASData.L3_sGEMM_A_mk);
5073         matrixBS.copyFrom(mBLASData.L3_sGEMM_B_kn);
5074         matrixCS.copyFrom(mBLASData.L3_sGEMM_C_mn);
5075 
5076         // Test for the default case: NO_TRANS
5077         mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS);
5078         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
5079         matrixCRef.copyFrom(mBLASData.L3_sGEMM_o_NN);
5080         verifyMatrix(matrixCRef, matrixCS);
5081 
5082         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
5083         matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dK));
5084         matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN));
5085         matrixAS.copyFrom(mBLASData.L3_sGEMM_A_km);
5086         matrixBS.copyFrom(mBLASData.L3_sGEMM_B_nk);
5087 
5088         transA = ScriptIntrinsicBLAS.TRANSPOSE;
5089         transB = ScriptIntrinsicBLAS.TRANSPOSE;
5090         // Reload matrix C, since it was overwritten by BLAS.
5091         matrixCS.copyFrom(mBLASData.L3_sGEMM_C_mn);
5092         mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS);
5093         matrixCRef.copyFrom(mBLASData.L3_sGEMM_o_TT);
5094         verifyMatrix(matrixCRef, matrixCS);
5095 
5096         transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
5097         transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
5098         matrixCS.copyFrom(mBLASData.L3_sGEMM_C_mn);
5099         mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS);
5100         matrixCRef.copyFrom(mBLASData.L3_sGEMM_o_HH);
5101         verifyMatrix(matrixCRef, matrixCS);
5102 
5103         mRS.finish();
5104         checkError();
5105     }
5106 
test_L3_DGEMM_Correctness()5107     public void test_L3_DGEMM_Correctness() {
5108         int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE;
5109         int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE;
5110 
5111         // Populate input allocations
5112         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dM));
5113         Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK));
5114         Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
5115         matrixAD.copyFrom(mBLASData.L3_dGEMM_A_mk);
5116         matrixBD.copyFrom(mBLASData.L3_dGEMM_B_kn);
5117         matrixCD.copyFrom(mBLASData.L3_dGEMM_C_mn);
5118         // Test for the default case: NO_TRANS
5119         mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD);
5120         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
5121         matrixCRef.copyFrom(mBLASData.L3_dGEMM_o_NN);
5122         verifyMatrix(matrixCRef, matrixCD);
5123 
5124         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
5125         matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dK));
5126         matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN));
5127         matrixAD.copyFrom(mBLASData.L3_dGEMM_A_km);
5128         matrixBD.copyFrom(mBLASData.L3_dGEMM_B_nk);
5129 
5130         transA = ScriptIntrinsicBLAS.TRANSPOSE;
5131         transB = ScriptIntrinsicBLAS.TRANSPOSE;
5132         // Reload matrix C, since it was overwritten by BLAS.
5133         matrixCD.copyFrom(mBLASData.L3_dGEMM_C_mn);
5134         mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD);
5135         matrixCRef.copyFrom(mBLASData.L3_dGEMM_o_TT);
5136         verifyMatrix(matrixCRef, matrixCD);
5137 
5138         transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
5139         transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
5140         matrixCD.copyFrom(mBLASData.L3_dGEMM_C_mn);
5141         mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD);
5142         matrixCRef.copyFrom(mBLASData.L3_dGEMM_o_HH);
5143         verifyMatrix(matrixCRef, matrixCD);
5144 
5145         mRS.finish();
5146         checkError();
5147     }
5148 
test_L3_CGEMM_Correctness()5149     public void test_L3_CGEMM_Correctness() {
5150         int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE;
5151         int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE;
5152 
5153         // Populate input allocations
5154         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dM));
5155         Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK));
5156         Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
5157         matrixAC.copyFrom(mBLASData.L3_cGEMM_A_mk);
5158         matrixBC.copyFrom(mBLASData.L3_cGEMM_B_kn);
5159         matrixCC.copyFrom(mBLASData.L3_cGEMM_C_mn);
5160 
5161         // Test for the default case: NO_TRANS
5162         mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC);
5163         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
5164         matrixCRef.copyFrom(mBLASData.L3_cGEMM_o_NN);
5165         verifyMatrix(matrixCRef, matrixCC);
5166 
5167         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
5168         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dK));
5169         matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN));
5170         matrixAC.copyFrom(mBLASData.L3_cGEMM_A_km);
5171         matrixBC.copyFrom(mBLASData.L3_cGEMM_B_nk);
5172 
5173         transA = ScriptIntrinsicBLAS.TRANSPOSE;
5174         transB = ScriptIntrinsicBLAS.TRANSPOSE;
5175         // Reload matrix C, since it was overwritten by BLAS.
5176         matrixCC.copyFrom(mBLASData.L3_cGEMM_C_mn);
5177         mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC);
5178         matrixCRef.copyFrom(mBLASData.L3_cGEMM_o_TT);
5179         verifyMatrix(matrixCRef, matrixCC);
5180 
5181         transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
5182         transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
5183         matrixCC.copyFrom(mBLASData.L3_cGEMM_C_mn);
5184         mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC);
5185         matrixCRef.copyFrom(mBLASData.L3_cGEMM_o_HH);
5186         verifyMatrix(matrixCRef, matrixCC);
5187 
5188         mRS.finish();
5189         checkError();
5190     }
5191 
test_L3_ZGEMM_Correctness()5192     public void test_L3_ZGEMM_Correctness() {
5193         int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE;
5194         int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE;
5195 
5196         // Populate input allocations
5197         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dM));
5198         Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK));
5199         Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
5200         matrixAZ.copyFrom(mBLASData.L3_zGEMM_A_mk);
5201         matrixBZ.copyFrom(mBLASData.L3_zGEMM_B_kn);
5202         matrixCZ.copyFrom(mBLASData.L3_zGEMM_C_mn);
5203 
5204         // Test for the default case: NO_TRANS
5205         mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
5206         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
5207         matrixCRef.copyFrom(mBLASData.L3_zGEMM_o_NN);
5208         verifyMatrix(matrixCRef, matrixCZ);
5209 
5210         // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE
5211         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dK));
5212         matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN));
5213         matrixAZ.copyFrom(mBLASData.L3_zGEMM_A_km);
5214         matrixBZ.copyFrom(mBLASData.L3_zGEMM_B_nk);
5215 
5216         transA = ScriptIntrinsicBLAS.TRANSPOSE;
5217         transB = ScriptIntrinsicBLAS.TRANSPOSE;
5218         // Reload matrix C, since it was overwritten by BLAS.
5219         matrixCZ.copyFrom(mBLASData.L3_zGEMM_C_mn);
5220         mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
5221         matrixCRef.copyFrom(mBLASData.L3_zGEMM_o_TT);
5222         verifyMatrix(matrixCRef, matrixCZ);
5223 
5224         transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
5225         transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
5226         matrixCZ.copyFrom(mBLASData.L3_zGEMM_C_mn);
5227         mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
5228         matrixCRef.copyFrom(mBLASData.L3_zGEMM_o_HH);
5229         verifyMatrix(matrixCRef, matrixCZ);
5230 
5231         mRS.finish();
5232         checkError();
5233     }
5234 
5235 
5236 
validateL3_xSYMM(Element e, int Side, int Uplo, Allocation A, Allocation B, Allocation C)5237     private boolean validateL3_xSYMM(Element e, int Side, int Uplo, Allocation A, Allocation B, Allocation C) {
5238         boolean result = true;
5239         result &= validateSide(Side);
5240         result &= validateUplo(Uplo);
5241         result &= validateL3(e, 0, 0, Side, A, B, C);
5242         result &= (A.getType().getX() == A.getType().getY());
5243         return result;
5244     }
5245 
xSYMM_API_test(int Side, int Uplo, ArrayList<Allocation> mMatrix)5246     private void xSYMM_API_test(int Side, int Uplo, ArrayList<Allocation> mMatrix) {
5247         for (Allocation matA : mMatrix) {
5248             for (Allocation matB : mMatrix) {
5249                 for (Allocation matC : mMatrix) {
5250                     Element elemA = matA.getType().getElement();
5251                     if (validateL3_xSYMM(elemA, Side, Uplo, matA, matB, matC)) {
5252                         try {
5253                             if (elemA.isCompatible(Element.F32(mRS))) {
5254                                 mBLAS.SSYMM(Side, Uplo, alphaS, matA, matB, betaS, matC);
5255                             } else if (elemA.isCompatible(Element.F64(mRS))) {
5256                                 mBLAS.DSYMM(Side, Uplo, alphaD, matA, matB, betaD, matC);
5257                             } else if (elemA.isCompatible(Element.F32_2(mRS))) {
5258                                 mBLAS.CSYMM(Side, Uplo, alphaC, matA, matB, betaC, matC);
5259                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
5260                                 mBLAS.ZSYMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC);
5261                             }
5262                         } catch (RSRuntimeException e) {
5263                             fail("should NOT throw RSRuntimeException");
5264                         }
5265                     } else {
5266                         try {
5267                             mBLAS.SSYMM(Side, Uplo, alphaS, matA, matB, betaS, matC);
5268                             fail("should throw RSRuntimeException for SSYMM");
5269                         } catch (RSRuntimeException e) {
5270                         }
5271                         try {
5272                             mBLAS.DSYMM(Side, Uplo, alphaD, matA, matB, betaD, matC);
5273                             fail("should throw RSRuntimeException for DSYMM");
5274                         } catch (RSRuntimeException e) {
5275                         }
5276                         try {
5277                             mBLAS.CSYMM(Side, Uplo, alphaC, matA, matB, betaC, matC);
5278                             fail("should throw RSRuntimeException for CSYMM");
5279                         } catch (RSRuntimeException e) {
5280                         }
5281                         try {
5282                             mBLAS.ZSYMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC);
5283                             fail("should throw RSRuntimeException for ZSYMM");
5284                         } catch (RSRuntimeException e) {
5285                         }
5286                     }
5287                 }
5288             }
5289         }
5290     }
5291 
L3_xSYMM_API(ArrayList<Allocation> mMatrix)5292     private void L3_xSYMM_API(ArrayList<Allocation> mMatrix) {
5293         for (int Side : mSide) {
5294             for (int Uplo : mUplo) {
5295                 xSYMM_API_test(Side, Uplo, mMatrix);
5296             }
5297         }
5298     }
5299 
test_L3_SSYMM_API()5300     public void test_L3_SSYMM_API() {
5301         L3_xSYMM_API(mMatrixS);
5302     }
5303 
test_L3_DSYMM_API()5304     public void test_L3_DSYMM_API() {
5305         L3_xSYMM_API(mMatrixD);
5306     }
5307 
test_L3_CSYMM_API()5308     public void test_L3_CSYMM_API() {
5309         L3_xSYMM_API(mMatrixC);
5310     }
5311 
test_L3_ZSYMM_API()5312     public void test_L3_ZSYMM_API() {
5313         L3_xSYMM_API(mMatrixZ);
5314     }
5315 
5316 
test_L3_SSYMM_Correctness()5317     public void test_L3_SSYMM_Correctness() {
5318         int side = ScriptIntrinsicBLAS.LEFT;
5319         int uplo = ScriptIntrinsicBLAS.UPPER;
5320 
5321         // Populate input allocations
5322         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dM));
5323         Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
5324         Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
5325         matrixAS.copyFrom(mBLASData.L3_sSYMM_A_mm);
5326         matrixBS.copyFrom(mBLASData.L3_sSYMM_B_mn);
5327         matrixCS.copyFrom(mBLASData.L3_sSYMM_C_mn);
5328 
5329         // Default case: SIDE = LEFT
5330         mBLAS.SSYMM(side, uplo, alphaS, matrixAS, matrixBS, betaS, matrixCS);
5331         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
5332         matrixCRef.copyFrom(mBLASData.L3_sSYMM_o_L);
5333         verifyMatrix(matrixCRef, matrixCS);
5334 
5335         // SIDE = RIGHT
5336         side = ScriptIntrinsicBLAS.RIGHT;
5337         matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
5338         matrixAS.copyFrom(mBLASData.L3_sSYMM_A_nn);
5339         // Reload matrix C, since it was overwritten by BLAS.
5340         matrixCS.copyFrom(mBLASData.L3_sSYMM_C_mn);
5341         mBLAS.SSYMM(side, uplo, alphaS, matrixAS, matrixBS, betaS, matrixCS);
5342         matrixCRef.copyFrom(mBLASData.L3_sSYMM_o_R);
5343         verifyMatrix(matrixCRef, matrixCS);
5344 
5345         mRS.finish();
5346         checkError();
5347     }
5348 
test_L3_DSYMM_Correctness()5349     public void test_L3_DSYMM_Correctness() {
5350         int side = ScriptIntrinsicBLAS.LEFT;
5351         int uplo = ScriptIntrinsicBLAS.UPPER;
5352 
5353         // Populate input allocations
5354         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dM));
5355         Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
5356         Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
5357         matrixAD.copyFrom(mBLASData.L3_dSYMM_A_mm);
5358         matrixBD.copyFrom(mBLASData.L3_dSYMM_B_mn);
5359         matrixCD.copyFrom(mBLASData.L3_dSYMM_C_mn);
5360 
5361         // Default case: SIDE = LEFT
5362         mBLAS.DSYMM(side, uplo, alphaD, matrixAD, matrixBD, betaD, matrixCD);
5363         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
5364         matrixCRef.copyFrom(mBLASData.L3_dSYMM_o_L);
5365         verifyMatrix(matrixCRef, matrixCD);
5366 
5367         // SIDE = RIGHT
5368         side = ScriptIntrinsicBLAS.RIGHT;
5369         matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
5370         matrixAD.copyFrom(mBLASData.L3_dSYMM_A_nn);
5371         // Reload matrix C, since it was overwritten by BLAS.
5372         matrixCD.copyFrom(mBLASData.L3_dSYMM_C_mn);
5373         mBLAS.DSYMM(side, uplo, alphaD, matrixAD, matrixBD, betaD, matrixCD);
5374         matrixCRef.copyFrom(mBLASData.L3_dSYMM_o_R);
5375         verifyMatrix(matrixCRef, matrixCD);
5376 
5377         mRS.finish();
5378         checkError();
5379     }
5380 
test_L3_CSYMM_Correctness()5381     public void test_L3_CSYMM_Correctness() {
5382         int side = ScriptIntrinsicBLAS.LEFT;
5383         int uplo = ScriptIntrinsicBLAS.UPPER;
5384 
5385         // Populate input allocations
5386         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM));
5387         Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
5388         Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
5389         matrixAC.copyFrom(mBLASData.L3_cSYMM_A_mm);
5390         matrixBC.copyFrom(mBLASData.L3_cSYMM_B_mn);
5391         matrixCC.copyFrom(mBLASData.L3_cSYMM_C_mn);
5392 
5393         // Default case: SIDE = LEFT
5394         mBLAS.CSYMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC);
5395         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
5396         matrixCRef.copyFrom(mBLASData.L3_cSYMM_o_L);
5397         verifyMatrix(matrixCRef, matrixCC);
5398 
5399         // SIDE = RIGHT
5400         side = ScriptIntrinsicBLAS.RIGHT;
5401         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
5402         matrixAC.copyFrom(mBLASData.L3_cSYMM_A_nn);
5403         // Reload matrix C, since it was overwritten by BLAS.
5404         matrixCC.copyFrom(mBLASData.L3_cSYMM_C_mn);
5405         mBLAS.CSYMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC);
5406         matrixCRef.copyFrom(mBLASData.L3_cSYMM_o_R);
5407         verifyMatrix(matrixCRef, matrixCC);
5408 
5409         mRS.finish();
5410         checkError();
5411     }
5412 
test_L3_ZSYMM_Correctness()5413     public void test_L3_ZSYMM_Correctness() {
5414         int side = ScriptIntrinsicBLAS.LEFT;
5415         int uplo = ScriptIntrinsicBLAS.UPPER;
5416 
5417         // Populate input allocations
5418         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM));
5419         Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
5420         Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
5421         matrixAZ.copyFrom(mBLASData.L3_zSYMM_A_mm);
5422         matrixBZ.copyFrom(mBLASData.L3_zSYMM_B_mn);
5423         matrixCZ.copyFrom(mBLASData.L3_zSYMM_C_mn);
5424 
5425         // Default case: SIDE = LEFT
5426         mBLAS.ZSYMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
5427         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
5428         matrixCRef.copyFrom(mBLASData.L3_zSYMM_o_L);
5429         verifyMatrix(matrixCRef, matrixCZ);
5430 
5431         // SIDE = RIGHT
5432         side = ScriptIntrinsicBLAS.RIGHT;
5433         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
5434         matrixAZ.copyFrom(mBLASData.L3_zSYMM_A_nn);
5435         // Reload matrix C, since it was overwritten by BLAS.
5436         matrixCZ.copyFrom(mBLASData.L3_zSYMM_C_mn);
5437         mBLAS.ZSYMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
5438         matrixCRef.copyFrom(mBLASData.L3_zSYMM_o_R);
5439         verifyMatrix(matrixCRef, matrixCZ);
5440 
5441         mRS.finish();
5442         checkError();
5443     }
5444 
5445 
validateHEMM(Element e, int Side, int Uplo, Allocation A, Allocation B, Allocation C)5446     private boolean validateHEMM(Element e, int Side, int Uplo, Allocation A, Allocation B, Allocation C) {
5447         if (!validateSide(Side)) {
5448             return false;
5449         }
5450 
5451         if (!validateUplo(Uplo)) {
5452             return false;
5453         }
5454 
5455         if (!A.getType().getElement().isCompatible(e) ||
5456             !B.getType().getElement().isCompatible(e) ||
5457             !C.getType().getElement().isCompatible(e)) {
5458             return false;
5459         }
5460 
5461         // A must be square; can potentially be relaxed similar to TRSM
5462         int adim = A.getType().getX();
5463         if (adim != A.getType().getY()) {
5464             return false;
5465         }
5466         if ((Side == ScriptIntrinsicBLAS.LEFT && adim != B.getType().getY()) ||
5467             (Side == ScriptIntrinsicBLAS.RIGHT && adim != B.getType().getX())) {
5468             return false;
5469         }
5470         if (B.getType().getX() != C.getType().getX() ||
5471             B.getType().getY() != C.getType().getY()) {
5472             return false;
5473         }
5474 
5475         return true;
5476     }
5477 
xHEMM_API_test(int Side, int Uplo, ArrayList<Allocation> mMatrix)5478     private void xHEMM_API_test(int Side, int Uplo, ArrayList<Allocation> mMatrix) {
5479         for (Allocation matA : mMatrix) {
5480             for (Allocation matB : mMatrix) {
5481                 for (Allocation matC : mMatrix) {
5482                     Element elemA = matA.getType().getElement();
5483                     if (validateHEMM(elemA, Side, Uplo, matA, matB, matC)) {
5484                         try {
5485                             if (elemA.isCompatible(Element.F32_2(mRS))) {
5486                                 mBLAS.CHEMM(Side, Uplo, alphaC, matA, matB, betaC, matC);
5487                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
5488                                 mBLAS.ZHEMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC);
5489                             }
5490                         } catch (RSRuntimeException e) {
5491                             fail("should NOT throw RSRuntimeException");
5492                         }
5493                     } else {
5494                         try {
5495                             mBLAS.CHEMM(Side, Uplo, alphaC, matA, matB, betaC, matC);
5496                             fail("should throw RSRuntimeException for CHEMM");
5497                         } catch (RSRuntimeException e) {
5498                         }
5499                         try {
5500                             mBLAS.ZHEMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC);
5501                             fail("should throw RSRuntimeException for ZHEMM");
5502                         } catch (RSRuntimeException e) {
5503                         }
5504                     }
5505                 }
5506             }
5507         }
5508     }
5509 
L3_xHEMM_API(ArrayList<Allocation> mMatrix)5510     public void L3_xHEMM_API(ArrayList<Allocation> mMatrix) {
5511         for (int Side : mSide) {
5512             for (int Uplo : mUplo) {
5513                 xHEMM_API_test(Side, Uplo, mMatrix);
5514             }
5515         }
5516     }
5517 
test_L3_CHEMM_API()5518     public void test_L3_CHEMM_API() {
5519         L3_xHEMM_API(mMatrixC);
5520     }
5521 
test_L3_ZHEMM_API()5522     public void test_L3_ZHEMM_API() {
5523         L3_xHEMM_API(mMatrixZ);
5524     }
5525 
test_L3_CHEMM_Correctness()5526     public void test_L3_CHEMM_Correctness() {
5527         int side = ScriptIntrinsicBLAS.LEFT;
5528         int uplo = ScriptIntrinsicBLAS.UPPER;
5529 
5530         // Populate input allocations
5531         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM));
5532         Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
5533         Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
5534         matrixAC.copyFrom(mBLASData.L3_cHEMM_A_mm);
5535         matrixBC.copyFrom(mBLASData.L3_cHEMM_B_mn);
5536         matrixCC.copyFrom(mBLASData.L3_cHEMM_C_mn);
5537 
5538         // Default case: SIDE = LEFT
5539         mBLAS.CHEMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC);
5540         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
5541         matrixCRef.copyFrom(mBLASData.L3_cHEMM_o_L);
5542         verifyMatrix(matrixCRef, matrixCC);
5543 
5544         // SIDE = RIGHT
5545         side = ScriptIntrinsicBLAS.RIGHT;
5546         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
5547         matrixAC.copyFrom(mBLASData.L3_cHEMM_A_nn);
5548         // Reload matrix C, since it was overwritten by BLAS.
5549         matrixCC.copyFrom(mBLASData.L3_cHEMM_C_mn);
5550         mBLAS.CHEMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC);
5551         matrixCRef.copyFrom(mBLASData.L3_cHEMM_o_R);
5552         verifyMatrix(matrixCRef, matrixCC);
5553 
5554         mRS.finish();
5555         checkError();
5556     }
5557 
test_L3_ZHEMM_Correctness()5558     public void test_L3_ZHEMM_Correctness() {
5559         int side = ScriptIntrinsicBLAS.LEFT;
5560         int uplo = ScriptIntrinsicBLAS.UPPER;
5561 
5562         // Populate input allocations
5563         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM));
5564         Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
5565         Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
5566         matrixAZ.copyFrom(mBLASData.L3_zHEMM_A_mm);
5567         matrixBZ.copyFrom(mBLASData.L3_zHEMM_B_mn);
5568         matrixCZ.copyFrom(mBLASData.L3_zHEMM_C_mn);
5569 
5570         // Default case: SIDE = LEFT
5571         mBLAS.ZHEMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
5572         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
5573         matrixCRef.copyFrom(mBLASData.L3_zHEMM_o_L);
5574         verifyMatrix(matrixCRef, matrixCZ);
5575 
5576         // SIDE = RIGHT
5577         side = ScriptIntrinsicBLAS.RIGHT;
5578         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
5579         matrixAZ.copyFrom(mBLASData.L3_zHEMM_A_nn);
5580         // Reload matrix C, since it was overwritten by BLAS.
5581         matrixCZ.copyFrom(mBLASData.L3_zHEMM_C_mn);
5582         mBLAS.ZHEMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
5583         matrixCRef.copyFrom(mBLASData.L3_zHEMM_o_R);
5584         verifyMatrix(matrixCRef, matrixCZ);
5585 
5586         mRS.finish();
5587         checkError();
5588     }
5589 
5590 
5591 
validateL3_xSYRK(Element e, int Uplo, int Trans, Allocation A, Allocation C)5592     private boolean validateL3_xSYRK(Element e, int Uplo, int Trans, Allocation A, Allocation C) {
5593         boolean result = true;
5594         result &= validateTranspose(Trans);
5595         result &= validateUplo(Uplo);
5596         result &= validateL3(e, Trans, 0, 0, A, null, C);
5597 
5598         return result;
5599     }
5600 
xSYRK_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix)5601     private void xSYRK_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) {
5602         for (Allocation matA : mMatrix) {
5603             for (Allocation matC : mMatrix) {
5604                 Element elemA = matA.getType().getElement();
5605                 if (validateL3_xSYRK(elemA, Uplo, Trans, matA, matC)) {
5606                     try {
5607                         if (elemA.isCompatible(Element.F32(mRS))) {
5608                             mBLAS.SSYRK(Uplo, Trans, alphaS, matA, betaS, matC);
5609                         } else if (elemA.isCompatible(Element.F64(mRS))) {
5610                             mBLAS.DSYRK(Uplo, Trans, alphaD, matA, betaD, matC);
5611                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
5612                             mBLAS.CSYRK(Uplo, Trans, alphaC, matA, betaC, matC);
5613                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
5614                             mBLAS.ZSYRK(Uplo, Trans, alphaZ, matA, betaZ, matC);
5615                         }
5616                     } catch (RSRuntimeException e) {
5617                         fail("should NOT throw RSRuntimeException");
5618                     }
5619                 } else {
5620                     try {
5621                         mBLAS.SSYRK(Uplo, Trans, alphaS, matA, betaS, matC);
5622                         fail("should throw RSRuntimeException for SSYRK");
5623                     } catch (RSRuntimeException e) {
5624                     }
5625                     try {
5626                         mBLAS.DSYRK(Uplo, Trans, alphaD, matA, betaD, matC);
5627                         fail("should throw RSRuntimeException for DSYRK");
5628                     } catch (RSRuntimeException e) {
5629                     }
5630                     try {
5631                         mBLAS.CSYRK(Uplo, Trans, alphaC, matA, betaC, matC);
5632                         fail("should throw RSRuntimeException for CSYRK");
5633                     } catch (RSRuntimeException e) {
5634                     }
5635                     try {
5636                         mBLAS.ZSYRK(Uplo, Trans, alphaZ, matA, betaZ, matC);
5637                         fail("should throw RSRuntimeException for ZSYRK");
5638                     } catch (RSRuntimeException e) {
5639                     }
5640                 }
5641             }
5642         }
5643     }
5644 
L3_xSYRK_API(ArrayList<Allocation> mMatrix)5645     public void L3_xSYRK_API(ArrayList<Allocation> mMatrix) {
5646         for (int Uplo : mUplo) {
5647             for (int Trans : mTranspose) {
5648                 xSYRK_API_test(Uplo, Trans, mMatrix);
5649             }
5650         }
5651     }
5652 
test_L3_SSYRK_API()5653     public void test_L3_SSYRK_API() {
5654         L3_xSYRK_API(mMatrixS);
5655     }
5656 
test_L3_DSYRK_API()5657     public void test_L3_DSYRK_API() {
5658         L3_xSYRK_API(mMatrixD);
5659     }
5660 
test_L3_CSYRK_API()5661     public void test_L3_CSYRK_API() {
5662         L3_xSYRK_API(mMatrixC);
5663     }
5664 
test_L3_ZSYRK_API()5665     public void test_L3_ZSYRK_API() {
5666         L3_xSYRK_API(mMatrixZ);
5667     }
5668 
5669 
test_L3_SSYRK_Correctness()5670     public void test_L3_SSYRK_Correctness() {
5671         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
5672         int uplo = ScriptIntrinsicBLAS.UPPER;
5673 
5674         // Populate input allocations
5675         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN));
5676         Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
5677         matrixAS.copyFrom(mBLASData.L3_sSYRK_A_nk);
5678         matrixCS.copyFrom(mBLASData.L3_sSYRK_C_nn);
5679 
5680         // Default case: NO_TRANSPOSE
5681         mBLAS.SSYRK(uplo, trans, alphaS, matrixAS, betaS, matrixCS);
5682         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
5683         matrixCRef.copyFrom(mBLASData.L3_sSYRK_o_N);
5684         verifyMatrix(matrixCRef, matrixCS, true);
5685 
5686         // Case: TRANSPOSE
5687         matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK));
5688         matrixAS.copyFrom(mBLASData.L3_sSYRK_A_kn);
5689         // Reload matrix C, since it was overwritten by BLAS.
5690         matrixCS.copyFrom(mBLASData.L3_sSYRK_C_nn);
5691 
5692         trans = ScriptIntrinsicBLAS.TRANSPOSE;
5693         mBLAS.SSYRK(uplo, trans, alphaS, matrixAS, betaS, matrixCS);
5694         matrixCRef.copyFrom(mBLASData.L3_sSYRK_o_T);
5695         verifyMatrix(matrixCRef, matrixCS, true);
5696 
5697         mRS.finish();
5698         checkError();
5699     }
5700 
test_L3_DSYRK_Correctness()5701     public void test_L3_DSYRK_Correctness() {
5702         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
5703         int uplo = ScriptIntrinsicBLAS.UPPER;
5704 
5705         // Populate input allocations
5706         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN));
5707         Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
5708         matrixAD.copyFrom(mBLASData.L3_dSYRK_A_nk);
5709         matrixCD.copyFrom(mBLASData.L3_dSYRK_C_nn);
5710 
5711         // Default case: NO_TRANSPOSE
5712         mBLAS.DSYRK(uplo, trans, alphaD, matrixAD, betaD, matrixCD);
5713         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
5714         matrixCRef.copyFrom(mBLASData.L3_dSYRK_o_N);
5715         verifyMatrix(matrixCRef, matrixCD, true);
5716 
5717         // Case: TRANSPOSE
5718         matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK));
5719         matrixAD.copyFrom(mBLASData.L3_dSYRK_A_kn);
5720         // Reload matrix C, since it was overwritten by BLAS.
5721         matrixCD.copyFrom(mBLASData.L3_dSYRK_C_nn);
5722 
5723         trans = ScriptIntrinsicBLAS.TRANSPOSE;
5724         mBLAS.DSYRK(uplo, trans, alphaD, matrixAD, betaD, matrixCD);
5725         matrixCRef.copyFrom(mBLASData.L3_dSYRK_o_T);
5726         verifyMatrix(matrixCRef, matrixCD, true);
5727 
5728         mRS.finish();
5729         checkError();
5730     }
5731 
test_L3_CSYRK_Correctness()5732     public void test_L3_CSYRK_Correctness() {
5733         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
5734         int uplo = ScriptIntrinsicBLAS.UPPER;
5735 
5736         // Populate input allocations
5737         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN));
5738         Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
5739         matrixAC.copyFrom(mBLASData.L3_cSYRK_A_nk);
5740         matrixCC.copyFrom(mBLASData.L3_cSYRK_C_nn);
5741 
5742         // Default case: NO_TRANSPOSE
5743         mBLAS.CSYRK(uplo, trans, alphaC, matrixAC, betaC, matrixCC);
5744         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
5745         matrixCRef.copyFrom(mBLASData.L3_cSYRK_o_N);
5746         verifyMatrix(matrixCRef, matrixCC, true);
5747 
5748         // Case: TRANSPOSE
5749         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK));
5750         matrixAC.copyFrom(mBLASData.L3_cSYRK_A_kn);
5751         // Reload matrix C, since it was overwritten by BLAS.
5752         matrixCC.copyFrom(mBLASData.L3_cSYRK_C_nn);
5753 
5754         trans = ScriptIntrinsicBLAS.TRANSPOSE;
5755         mBLAS.CSYRK(uplo, trans, alphaC, matrixAC, betaC, matrixCC);
5756         matrixCRef.copyFrom(mBLASData.L3_cSYRK_o_T);
5757         verifyMatrix(matrixCRef, matrixCC, true);
5758 
5759         mRS.finish();
5760         checkError();
5761     }
5762 
test_L3_ZSYRK_Correctness()5763     public void test_L3_ZSYRK_Correctness() {
5764         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
5765         int uplo = ScriptIntrinsicBLAS.UPPER;
5766 
5767         // Populate input allocations
5768         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN));
5769         Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
5770         matrixAZ.copyFrom(mBLASData.L3_zSYRK_A_nk);
5771         matrixCZ.copyFrom(mBLASData.L3_zSYRK_C_nn);
5772 
5773         // Default case: NO_TRANSPOSE
5774         mBLAS.ZSYRK(uplo, trans, alphaZ, matrixAZ, betaZ, matrixCZ);
5775         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
5776         matrixCRef.copyFrom(mBLASData.L3_zSYRK_o_N);
5777         verifyMatrix(matrixCRef, matrixCZ, true);
5778 
5779         // Case: TRANSPOSE
5780         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK));
5781         matrixAZ.copyFrom(mBLASData.L3_zSYRK_A_kn);
5782         // Reload matrix C, since it was overwritten by BLAS.
5783         matrixCZ.copyFrom(mBLASData.L3_zSYRK_C_nn);
5784 
5785         trans = ScriptIntrinsicBLAS.TRANSPOSE;
5786         mBLAS.ZSYRK(uplo, trans, alphaZ, matrixAZ, betaZ, matrixCZ);
5787         matrixCRef.copyFrom(mBLASData.L3_zSYRK_o_T);
5788         verifyMatrix(matrixCRef, matrixCZ, true);
5789 
5790         mRS.finish();
5791         checkError();
5792     }
5793 
5794 
validateHERK(Element e, int Uplo, int Trans, Allocation A, Allocation C)5795     private boolean validateHERK(Element e, int Uplo, int Trans, Allocation A, Allocation C) {
5796         if (!validateUplo(Uplo)) {
5797             return false;
5798         }
5799         if (!A.getType().getElement().isCompatible(e) ||
5800             !C.getType().getElement().isCompatible(e)) {
5801             return false;
5802         }
5803         if (!validateConjTranspose(Trans)) {
5804             return false;
5805         }
5806         int cdim = C.getType().getX();
5807         if (cdim != C.getType().getY()) {
5808             return false;
5809         }
5810         if (Trans == ScriptIntrinsicBLAS.NO_TRANSPOSE) {
5811             if (cdim != A.getType().getY()) {
5812                 return false;
5813             }
5814         } else {
5815             if (cdim != A.getType().getX()) {
5816                 return false;
5817             }
5818         }
5819         return true;
5820     }
5821 
xHERK_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix)5822     private void xHERK_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) {
5823         for (Allocation matA : mMatrix) {
5824             for (Allocation matC : mMatrix) {
5825                 Element elemA = matA.getType().getElement();
5826                 if (validateHERK(elemA, Uplo, Trans, matA, matC)) {
5827                     try {
5828                         if (elemA.isCompatible(Element.F32_2(mRS))) {
5829                             mBLAS.CHERK(Uplo, Trans, alphaS, matA, betaS, matC);
5830                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
5831                             mBLAS.ZHERK(Uplo, Trans, alphaD, matA, betaD, matC);
5832                         }
5833                     } catch (RSRuntimeException e) {
5834                         fail("should NOT throw RSRuntimeException");
5835                     }
5836                 } else {
5837                     try {
5838                         mBLAS.CHERK(Uplo, Trans, alphaS, matA, betaS, matC);
5839                         fail("should throw RSRuntimeException for CHERK");
5840                     } catch (RSRuntimeException e) {
5841                     }
5842                     try {
5843                         mBLAS.ZHERK(Uplo, Trans, alphaD, matA, betaD, matC);
5844                         fail("should throw RSRuntimeException for ZHERK");
5845                     } catch (RSRuntimeException e) {
5846                     }
5847                 }
5848             }
5849         }
5850     }
5851 
L3_xHERK_API(ArrayList<Allocation> mMatrix)5852     public void L3_xHERK_API(ArrayList<Allocation> mMatrix) {
5853         for (int Uplo : mUplo) {
5854             for (int Trans : mTranspose) {
5855                 xHERK_API_test(Uplo, Trans, mMatrix);
5856             }
5857         }
5858     }
5859 
test_L3_CHERK_API()5860     public void test_L3_CHERK_API() {
5861         L3_xHERK_API(mMatrixC);
5862     }
5863 
test_L3_ZHERK_API()5864     public void test_L3_ZHERK_API() {
5865         L3_xHERK_API(mMatrixZ);
5866     }
5867 
test_L3_CHERK_Correctness()5868     public void test_L3_CHERK_Correctness() {
5869         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
5870         int uplo = ScriptIntrinsicBLAS.UPPER;
5871 
5872         // Populate input allocations
5873         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN));
5874         Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
5875         matrixAC.copyFrom(mBLASData.L3_cHERK_A_nk);
5876         matrixCC.copyFrom(mBLASData.L3_cHERK_C_nn);
5877 
5878         // Default case: NO_TRANSPOSE
5879         mBLAS.CHERK(uplo, trans, alphaS, matrixAC, betaS, matrixCC);
5880         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
5881         matrixCRef.copyFrom(mBLASData.L3_cHERK_o_N);
5882         verifyMatrix(matrixCRef, matrixCC, true);
5883 
5884         // Case: TRANSPOSE
5885         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK));
5886         matrixAC.copyFrom(mBLASData.L3_cHERK_A_kn);
5887         // Reload matrix C, since it was overwritten by BLAS.
5888         matrixCC.copyFrom(mBLASData.L3_cHERK_C_nn);
5889 
5890         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
5891         mBLAS.CHERK(uplo, trans, alphaS, matrixAC, betaS, matrixCC);
5892         matrixCRef.copyFrom(mBLASData.L3_cHERK_o_H);
5893         verifyMatrix(matrixCRef, matrixCC, true);
5894 
5895         mRS.finish();
5896         checkError();
5897     }
5898 
test_L3_ZHERK_Correctness()5899     public void test_L3_ZHERK_Correctness() {
5900         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
5901         int uplo = ScriptIntrinsicBLAS.UPPER;
5902 
5903         // Populate input allocations
5904         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN));
5905         Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
5906         matrixAZ.copyFrom(mBLASData.L3_zHERK_A_nk);
5907         matrixCZ.copyFrom(mBLASData.L3_zHERK_C_nn);
5908 
5909         // Default case: NO_TRANSPOSE
5910         mBLAS.ZHERK(uplo, trans, alphaD, matrixAZ, betaD, matrixCZ);
5911         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
5912         matrixCRef.copyFrom(mBLASData.L3_zHERK_o_N);
5913         verifyMatrix(matrixCRef, matrixCZ, true);
5914 
5915         // Case: TRANSPOSE
5916         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK));
5917         matrixAZ.copyFrom(mBLASData.L3_zHERK_A_kn);
5918         // Reload matrix C, since it was overwritten by BLAS.
5919         matrixCZ.copyFrom(mBLASData.L3_zHERK_C_nn);
5920 
5921         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
5922         mBLAS.ZHERK(uplo, trans, alphaD, matrixAZ, betaD, matrixCZ);
5923         matrixCRef.copyFrom(mBLASData.L3_zHERK_o_H);
5924         verifyMatrix(matrixCRef, matrixCZ, true);
5925 
5926         mRS.finish();
5927         checkError();
5928     }
5929 
5930 
validateSYR2K(Element e, int Uplo, int Trans, Allocation A, Allocation B, Allocation C)5931     private boolean validateSYR2K(Element e, int Uplo, int Trans, Allocation A, Allocation B, Allocation C) {
5932         if (!validateTranspose(Trans)) {
5933             return false;
5934         }
5935         if (!validateUplo(Uplo)) {
5936             return false;
5937         }
5938 
5939         if (!A.getType().getElement().isCompatible(e) ||
5940             !B.getType().getElement().isCompatible(e) ||
5941             !C.getType().getElement().isCompatible(e)) {
5942             return false;
5943         }
5944         int Cdim = -1;
5945         // A is n x k if no transpose, k x n if transpose
5946         // C is n x n
5947         if (Trans == ScriptIntrinsicBLAS.TRANSPOSE) {
5948             // check columns versus C
5949             Cdim = A.getType().getX();
5950         } else {
5951             // check rows versus C
5952             Cdim = A.getType().getY();
5953         }
5954         if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {
5955             return false;
5956         }
5957         // A dims == B dims
5958         if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
5959             return false;
5960         }
5961         return true;
5962     }
5963 
xSYR2K_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix)5964     private void xSYR2K_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) {
5965         for (Allocation matA : mMatrix) {
5966             for (Allocation matB : mMatrix) {
5967                 for (Allocation matC : mMatrix) {
5968                     Element elemA = matA.getType().getElement();
5969                     if (validateSYR2K(elemA, Uplo, Trans, matA, matB, matC)) {
5970                         try {
5971                             if (elemA.isCompatible(Element.F32(mRS))) {
5972                                 mBLAS.SSYR2K(Uplo, Trans, alphaS, matA, matB, betaS, matC);
5973                             } else if (elemA.isCompatible(Element.F64(mRS))) {
5974                                 mBLAS.DSYR2K(Uplo, Trans, alphaD, matA, matB, betaD, matC);
5975                             } else if (elemA.isCompatible(Element.F32_2(mRS))) {
5976                                 mBLAS.CSYR2K(Uplo, Trans, alphaC, matA, matB, betaC, matC);
5977                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
5978                                 mBLAS.ZSYR2K(Uplo, Trans, alphaZ, matA, matB, betaZ, matC);
5979                             }
5980                         } catch (RSRuntimeException e) {
5981                             fail("should NOT throw RSRuntimeException");
5982                         }
5983                     } else {
5984                         try {
5985                             mBLAS.SSYR2K(Uplo, Trans, alphaS, matA, matB, betaS, matC);
5986                             fail("should throw RSRuntimeException for SSYR2K");
5987                         } catch (RSRuntimeException e) {
5988                         }
5989                         try {
5990                             mBLAS.DSYR2K(Uplo, Trans, alphaD, matA, matB, betaD, matC);
5991                             fail("should throw RSRuntimeException for DSYR2K");
5992                         } catch (RSRuntimeException e) {
5993                         }
5994                         try {
5995                             mBLAS.CSYR2K(Uplo, Trans, alphaC, matA, matB, betaC, matC);
5996                             fail("should throw RSRuntimeException for CSYR2K");
5997                         } catch (RSRuntimeException e) {
5998                         }
5999                         try {
6000                             mBLAS.ZSYR2K(Uplo, Trans, alphaZ, matA, matB, betaZ, matC);
6001                             fail("should throw RSRuntimeException for ZSYR2K");
6002                         } catch (RSRuntimeException e) {
6003                         }
6004                     }
6005                 }
6006             }
6007         }
6008     }
6009 
L3_xSYR2K_API(ArrayList<Allocation> mMatrix)6010     public void L3_xSYR2K_API(ArrayList<Allocation> mMatrix) {
6011         for (int Uplo : mUplo) {
6012             for (int Trans : mTranspose) {
6013                 xSYR2K_API_test(Uplo, Trans, mMatrix);
6014             }
6015         }
6016     }
6017 
test_L3_SSYR2K_API()6018     public void test_L3_SSYR2K_API() {
6019         L3_xSYR2K_API(mMatrixS);
6020     }
6021 
test_L3_DSYR2K_API()6022     public void test_L3_DSYR2K_API() {
6023         L3_xSYR2K_API(mMatrixD);
6024     }
6025 
test_L3_CSYR2K_API()6026     public void test_L3_CSYR2K_API() {
6027         L3_xSYR2K_API(mMatrixC);
6028     }
6029 
test_L3_ZSYR2K_API()6030     public void test_L3_ZSYR2K_API() {
6031         L3_xSYR2K_API(mMatrixZ);
6032     }
6033 
6034 
test_L3_SSYR2K_Correctness()6035     public void test_L3_SSYR2K_Correctness() {
6036         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
6037         int uplo = ScriptIntrinsicBLAS.UPPER;
6038 
6039         // Populate input allocations
6040         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN));
6041         Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN));
6042         Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
6043         matrixAS.copyFrom(mBLASData.L3_sSYR2K_A_nk);
6044         matrixBS.copyFrom(mBLASData.L3_sSYR2K_B_nk);
6045         matrixCS.copyFrom(mBLASData.L3_sSYR2K_C_nn);
6046 
6047         // Default case: NO_TRANSPOSE
6048         mBLAS.SSYR2K(uplo, trans, alphaS, matrixAS, matrixBS, betaS, matrixCS);
6049         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
6050         matrixCRef.copyFrom(mBLASData.L3_sSYR2K_o_N);
6051         verifyMatrix(matrixCRef, matrixCS, true);
6052 
6053         // Case: TRANSPOSE
6054         matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK));
6055         matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK));
6056         matrixAS.copyFrom(mBLASData.L3_sSYR2K_A_kn);
6057         matrixBS.copyFrom(mBLASData.L3_sSYR2K_B_kn);
6058         // Reload matrix C, since it was overwritten by BLAS.
6059         matrixCS.copyFrom(mBLASData.L3_sSYR2K_C_nn);
6060 
6061         trans = ScriptIntrinsicBLAS.TRANSPOSE;
6062         mBLAS.SSYR2K(uplo, trans, alphaS, matrixAS, matrixBS, betaS, matrixCS);
6063         matrixCRef.copyFrom(mBLASData.L3_sSYR2K_o_T);
6064         verifyMatrix(matrixCRef, matrixCS, true);
6065 
6066         mRS.finish();
6067         checkError();
6068     }
6069 
test_L3_DSYR2K_Correctness()6070     public void test_L3_DSYR2K_Correctness() {
6071         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
6072         int uplo = ScriptIntrinsicBLAS.UPPER;
6073 
6074         // Populate input allocations
6075         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN));
6076         Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN));
6077         Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
6078         matrixAD.copyFrom(mBLASData.L3_dSYR2K_A_nk);
6079         matrixBD.copyFrom(mBLASData.L3_dSYR2K_B_nk);
6080         matrixCD.copyFrom(mBLASData.L3_dSYR2K_C_nn);
6081 
6082         // Default case: NO_TRANSPOSE
6083         mBLAS.DSYR2K(uplo, trans, alphaD, matrixAD, matrixBD, betaD, matrixCD);
6084         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
6085         matrixCRef.copyFrom(mBLASData.L3_dSYR2K_o_N);
6086         verifyMatrix(matrixCRef, matrixCD, true);
6087 
6088         // Case: TRANSPOSE
6089         matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK));
6090         matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK));
6091         matrixAD.copyFrom(mBLASData.L3_dSYR2K_A_kn);
6092         matrixBD.copyFrom(mBLASData.L3_dSYR2K_B_kn);
6093         // Reload matrix C, since it was overwritten by BLAS.
6094         matrixCD.copyFrom(mBLASData.L3_dSYR2K_C_nn);
6095 
6096         trans = ScriptIntrinsicBLAS.TRANSPOSE;
6097         mBLAS.DSYR2K(uplo, trans, alphaD, matrixAD, matrixBD, betaD, matrixCD);
6098         matrixCRef.copyFrom(mBLASData.L3_dSYR2K_o_T);
6099         verifyMatrix(matrixCRef, matrixCD, true);
6100 
6101         mRS.finish();
6102         checkError();
6103     }
6104 
test_L3_CSYR2K_Correctness()6105     public void test_L3_CSYR2K_Correctness() {
6106         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
6107         int uplo = ScriptIntrinsicBLAS.UPPER;
6108 
6109         // Populate input allocations
6110         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN));
6111         Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN));
6112         Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
6113         matrixAC.copyFrom(mBLASData.L3_cSYR2K_A_nk);
6114         matrixBC.copyFrom(mBLASData.L3_cSYR2K_B_nk);
6115         matrixCC.copyFrom(mBLASData.L3_cSYR2K_C_nn);
6116 
6117         // Default case: NO_TRANSPOSE
6118         mBLAS.CSYR2K(uplo, trans, alphaC, matrixAC, matrixBC, betaC, matrixCC);
6119         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
6120         matrixCRef.copyFrom(mBLASData.L3_cSYR2K_o_N);
6121         verifyMatrix(matrixCRef, matrixCC, true);
6122 
6123         // Case: TRANSPOSE
6124         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK));
6125         matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK));
6126         matrixAC.copyFrom(mBLASData.L3_cSYR2K_A_kn);
6127         matrixBC.copyFrom(mBLASData.L3_cSYR2K_B_kn);
6128         // Reload matrix C, since it was overwritten by BLAS.
6129         matrixCC.copyFrom(mBLASData.L3_cSYR2K_C_nn);
6130 
6131         trans = ScriptIntrinsicBLAS.TRANSPOSE;
6132         mBLAS.CSYR2K(uplo, trans, alphaC, matrixAC, matrixBC, betaC, matrixCC);
6133         matrixCRef.copyFrom(mBLASData.L3_cSYR2K_o_T);
6134         verifyMatrix(matrixCRef, matrixCC, true);
6135 
6136         mRS.finish();
6137         checkError();
6138     }
6139 
test_L3_ZSYR2K_Correctness()6140     public void test_L3_ZSYR2K_Correctness() {
6141         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
6142         int uplo = ScriptIntrinsicBLAS.UPPER;
6143 
6144         // Populate input allocations
6145         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN));
6146         Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN));
6147         Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
6148         matrixAZ.copyFrom(mBLASData.L3_zSYR2K_A_nk);
6149         matrixBZ.copyFrom(mBLASData.L3_zSYR2K_B_nk);
6150         matrixCZ.copyFrom(mBLASData.L3_zSYR2K_C_nn);
6151 
6152         // Default case: NO_TRANSPOSE
6153         mBLAS.ZSYR2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
6154         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
6155         matrixCRef.copyFrom(mBLASData.L3_zSYR2K_o_N);
6156         verifyMatrix(matrixCRef, matrixCZ, true);
6157 
6158         // Case: TRANSPOSE
6159         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK));
6160         matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK));
6161         matrixAZ.copyFrom(mBLASData.L3_zSYR2K_A_kn);
6162         matrixBZ.copyFrom(mBLASData.L3_zSYR2K_B_kn);
6163         // Reload matrix C, since it was overwritten by BLAS.
6164         matrixCZ.copyFrom(mBLASData.L3_zSYR2K_C_nn);
6165 
6166         trans = ScriptIntrinsicBLAS.TRANSPOSE;
6167         mBLAS.ZSYR2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ);
6168         matrixCRef.copyFrom(mBLASData.L3_zSYR2K_o_T);
6169         verifyMatrix(matrixCRef, matrixCZ, true);
6170 
6171         mRS.finish();
6172         checkError();
6173     }
6174 
6175 
validateHER2K(Element e, int Uplo, int Trans, Allocation A, Allocation B, Allocation C)6176     private boolean validateHER2K(Element e, int Uplo, int Trans, Allocation A, Allocation B, Allocation C) {
6177         if (!validateUplo(Uplo)) {
6178             return false;
6179         }
6180         if (!A.getType().getElement().isCompatible(e) ||
6181             !B.getType().getElement().isCompatible(e) ||
6182             !C.getType().getElement().isCompatible(e)) {
6183             return false;
6184         }
6185         if (!validateConjTranspose(Trans)) {
6186             return false;
6187         }
6188         int cdim = C.getType().getX();
6189         if (cdim != C.getType().getY()) {
6190             return false;
6191         }
6192         if (Trans == ScriptIntrinsicBLAS.NO_TRANSPOSE) {
6193             if (A.getType().getY() != cdim) {
6194                 return false;
6195             }
6196         } else {
6197             if (A.getType().getX() != cdim) {
6198                 return false;
6199             }
6200         }
6201         if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
6202             return false;
6203         }
6204         return true;
6205     }
6206 
xHER2K_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix)6207     private void xHER2K_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) {
6208         for (Allocation matA : mMatrix) {
6209             for (Allocation matB : mMatrix) {
6210                 for (Allocation matC : mMatrix) {
6211                     Element elemA = matA.getType().getElement();
6212                     if (validateHER2K(elemA, Uplo, Trans, matA, matB, matC)) {
6213                         try {
6214                             if (elemA.isCompatible(Element.F32_2(mRS))) {
6215                                 mBLAS.CHER2K(Uplo, Trans, alphaC, matA, matB, betaS, matC);
6216                             } else if (elemA.isCompatible(Element.F64_2(mRS))) {
6217                                 mBLAS.ZHER2K(Uplo, Trans, alphaZ, matA, matB, betaD, matC);
6218                             }
6219                         } catch (RSRuntimeException e) {
6220                             fail("should NOT throw RSRuntimeException");
6221                         }
6222                     } else {
6223                         try {
6224                             mBLAS.CHER2K(Uplo, Trans, alphaC, matA, matB, betaS, matC);
6225                             fail("should throw RSRuntimeException for CHER2K");
6226                         } catch (RSRuntimeException e) {
6227                         }
6228                         try {
6229                             mBLAS.ZHER2K(Uplo, Trans, alphaZ, matA, matB, betaD, matC);
6230                             fail("should throw RSRuntimeException for ZHER2K");
6231                         } catch (RSRuntimeException e) {
6232                         }
6233                     }
6234                 }
6235             }
6236         }
6237     }
6238 
L3_xHER2K_API(ArrayList<Allocation> mMatrix)6239     public void L3_xHER2K_API(ArrayList<Allocation> mMatrix) {
6240         for (int Uplo : mUplo) {
6241             for (int Trans : mTranspose) {
6242                 xHER2K_API_test(Uplo, Trans, mMatrix);
6243             }
6244         }
6245     }
6246 
test_L3_CHER2K_API()6247     public void test_L3_CHER2K_API() {
6248         L3_xHER2K_API(mMatrixC);
6249     }
6250 
test_L3_ZHER2K_API()6251     public void test_L3_ZHER2K_API() {
6252         L3_xHER2K_API(mMatrixZ);
6253     }
6254 
test_L3_CHER2K_Correctness()6255     public void test_L3_CHER2K_Correctness() {
6256         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
6257         int uplo = ScriptIntrinsicBLAS.UPPER;
6258 
6259         // Populate input allocations
6260         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN));
6261         Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN));
6262         Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
6263         matrixAC.copyFrom(mBLASData.L3_cHER2K_A_nk);
6264         matrixBC.copyFrom(mBLASData.L3_cHER2K_B_nk);
6265         matrixCC.copyFrom(mBLASData.L3_cHER2K_C_nn);
6266 
6267         // Default case: NO_TRANSPOSE
6268         mBLAS.CHER2K(uplo, trans, alphaC, matrixAC, matrixBC, betaS, matrixCC);
6269         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
6270         matrixCRef.copyFrom(mBLASData.L3_cHER2K_o_N);
6271         verifyMatrix(matrixCRef, matrixCC, true);
6272 
6273         // Case: TRANSPOSE
6274         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK));
6275         matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK));
6276         matrixAC.copyFrom(mBLASData.L3_cHER2K_A_kn);
6277         matrixBC.copyFrom(mBLASData.L3_cHER2K_B_kn);
6278         // Reload matrix C, since it was overwritten by BLAS.
6279         matrixCC.copyFrom(mBLASData.L3_cHER2K_C_nn);
6280 
6281         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
6282         mBLAS.CHER2K(uplo, trans, alphaC, matrixAC, matrixBC, betaS, matrixCC);
6283         matrixCRef.copyFrom(mBLASData.L3_cHER2K_o_H);
6284         verifyMatrix(matrixCRef, matrixCC, true);
6285 
6286         mRS.finish();
6287         checkError();
6288     }
6289 
test_L3_ZHER2K_Correctness()6290     public void test_L3_ZHER2K_Correctness() {
6291         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
6292         int uplo = ScriptIntrinsicBLAS.UPPER;
6293 
6294         // Populate input allocations
6295         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN));
6296         Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN));
6297         Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
6298         matrixAZ.copyFrom(mBLASData.L3_zHER2K_A_nk);
6299         matrixBZ.copyFrom(mBLASData.L3_zHER2K_B_nk);
6300         matrixCZ.copyFrom(mBLASData.L3_zHER2K_C_nn);
6301 
6302         // Default case: NO_TRANSPOSE
6303         mBLAS.ZHER2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaD, matrixCZ);
6304         Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
6305         matrixCRef.copyFrom(mBLASData.L3_zHER2K_o_N);
6306         verifyMatrix(matrixCRef, matrixCZ, true);
6307 
6308         // Case: TRANSPOSE
6309         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK));
6310         matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK));
6311         matrixAZ.copyFrom(mBLASData.L3_zHER2K_A_kn);
6312         matrixBZ.copyFrom(mBLASData.L3_zHER2K_B_kn);
6313         // Reload matrix C, since it was overwritten by BLAS.
6314         matrixCZ.copyFrom(mBLASData.L3_zHER2K_C_nn);
6315 
6316         trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE;
6317         mBLAS.ZHER2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaD, matrixCZ);
6318         matrixCRef.copyFrom(mBLASData.L3_zHER2K_o_H);
6319         verifyMatrix(matrixCRef, matrixCZ, true);
6320 
6321         mRS.finish();
6322         checkError();
6323     }
6324 
6325 
validateTRMM(Element e, int Side, int Uplo, int TransA, int Diag, Allocation A, Allocation B)6326     private boolean validateTRMM(Element e, int Side, int Uplo, int TransA, int Diag, Allocation A, Allocation B) {
6327         if (!validateSide(Side)) {
6328             return false;
6329         }
6330         if (!validateUplo(Uplo)) {
6331             return false;
6332         }
6333         if (!validateTranspose(TransA)) {
6334             return false;
6335         }
6336         if (!validateDiag(Diag)) {
6337             return false;
6338         }
6339         int aM = -1, aN = -1, bM = -1, bN = -1;
6340         if (!A.getType().getElement().isCompatible(e) ||
6341             !B.getType().getElement().isCompatible(e)) {
6342             return false;
6343         }
6344 
6345         aM = A.getType().getY();
6346         aN = A.getType().getX();
6347         if (aM != aN) {
6348             return false;
6349         }
6350 
6351         bM = B.getType().getY();
6352         bN = B.getType().getX();
6353         if (Side == ScriptIntrinsicBLAS.LEFT) {
6354             if (aN != bM) {
6355                 return false;
6356             }
6357         } else {
6358             if (bN != aM) {
6359                 return false;
6360             }
6361         }
6362         return true;
6363     }
6364 
xTRMM_API_test(int Side, int Uplo, int TransA, int Diag, ArrayList<Allocation> mMatrix)6365     private void xTRMM_API_test(int Side, int Uplo, int TransA, int Diag, ArrayList<Allocation> mMatrix) {
6366         for (Allocation matA : mMatrix) {
6367             for (Allocation matB : mMatrix) {
6368                 Element elemA = matA.getType().getElement();
6369                 if (validateTRMM(elemA, Side, Uplo, TransA, Diag, matA, matB)) {
6370                     try {
6371                         if (elemA.isCompatible(Element.F32(mRS))) {
6372                             mBLAS.STRMM(Side, Uplo, TransA, Diag, alphaS, matA, matB);
6373                         } else if (elemA.isCompatible(Element.F64(mRS))) {
6374                             mBLAS.DTRMM(Side, Uplo, TransA, Diag, alphaD, matA, matB);
6375                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
6376                             mBLAS.CTRMM(Side, Uplo, TransA, Diag, alphaC, matA, matB);
6377                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
6378                             mBLAS.ZTRMM(Side, Uplo, TransA, Diag, alphaZ, matA, matB);
6379                         }
6380                     } catch (RSRuntimeException e) {
6381                         fail("should NOT throw RSRuntimeException");
6382                     }
6383                 } else {
6384                     try {
6385                         mBLAS.STRMM(Side, Uplo, TransA, Diag, alphaS, matA, matB);
6386                         fail("should throw RSRuntimeException for STRMM");
6387                     } catch (RSRuntimeException e) {
6388                     }
6389                     try {
6390                         mBLAS.DTRMM(Side, Uplo, TransA, Diag, alphaD, matA, matB);
6391                         fail("should throw RSRuntimeException for DTRMM");
6392                     } catch (RSRuntimeException e) {
6393                     }
6394                     try {
6395                         mBLAS.CTRMM(Side, Uplo, TransA, Diag, alphaC, matA, matB);
6396                         fail("should throw RSRuntimeException for CTRMM");
6397                     } catch (RSRuntimeException e) {
6398                     }
6399                     try {
6400                         mBLAS.ZTRMM(Side, Uplo, TransA, Diag, alphaZ, matA, matB);
6401                         fail("should throw RSRuntimeException for ZTRMM");
6402                     } catch (RSRuntimeException e) {
6403                     }
6404                 }
6405             }
6406         }
6407     }
6408 
L3_xTRMM_API(ArrayList<Allocation> mMatrix)6409     public void L3_xTRMM_API(ArrayList<Allocation> mMatrix) {
6410         for (int Side : mSide) {
6411             for (int Uplo : mUplo) {
6412                 for (int TransA : mTranspose) {
6413                     for (int Diag : mDiag) {
6414                         xTRMM_API_test(Side, Uplo, TransA, Diag, mMatrix);
6415                     }
6416                 }
6417             }
6418         }
6419     }
6420 
test_L3_STRMM_API()6421     public void test_L3_STRMM_API() {
6422         L3_xTRMM_API(mMatrixS);
6423     }
6424 
test_L3_DTRMM_API()6425     public void test_L3_DTRMM_API() {
6426         L3_xTRMM_API(mMatrixD);
6427     }
6428 
test_L3_CTRMM_API()6429     public void test_L3_CTRMM_API() {
6430         L3_xTRMM_API(mMatrixC);
6431     }
6432 
test_L3_ZTRMM_API()6433     public void test_L3_ZTRMM_API() {
6434         L3_xTRMM_API(mMatrixZ);
6435     }
6436 
6437 
test_L3_STRMM_Correctness()6438     public void test_L3_STRMM_Correctness() {
6439         int side = ScriptIntrinsicBLAS.LEFT;
6440         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
6441         int uplo = ScriptIntrinsicBLAS.UPPER;
6442         int diag = ScriptIntrinsicBLAS.NON_UNIT;
6443 
6444         // Populate input allocations
6445         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dM));
6446         Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
6447         matrixAS.copyFrom(mBLASData.L3_sTRMM_A_mm);
6448         matrixBS.copyFrom(mBLASData.L3_sTRMM_B_mn);
6449 
6450         // Default case: LEFT, UPPER, NO_TRANSPOSE
6451         mBLAS.STRMM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS);
6452         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
6453         matrixBRef.copyFrom(mBLASData.L3_sTRMM_o_LUN);
6454         verifyMatrix(matrixBRef, matrixBS);
6455 
6456         // Case: RIGHT, LOWER, TRANSPOSE
6457         matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
6458         matrixAS.copyFrom(mBLASData.L3_sTRMM_A_nn);
6459         // Reload matrix B, since it was overwritten by BLAS.
6460         matrixBS.copyFrom(mBLASData.L3_sTRMM_B_mn);
6461 
6462         side = ScriptIntrinsicBLAS.RIGHT;
6463         trans = ScriptIntrinsicBLAS.TRANSPOSE;
6464         uplo = ScriptIntrinsicBLAS.LOWER;
6465         mBLAS.STRMM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS);
6466         matrixBRef.copyFrom(mBLASData.L3_sTRMM_o_RLT);
6467         verifyMatrix(matrixBRef, matrixBS);
6468 
6469         mRS.finish();
6470         checkError();
6471     }
6472 
test_L3_DTRMM_Correctness()6473     public void test_L3_DTRMM_Correctness() {
6474         int side = ScriptIntrinsicBLAS.LEFT;
6475         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
6476         int uplo = ScriptIntrinsicBLAS.UPPER;
6477         int diag = ScriptIntrinsicBLAS.NON_UNIT;
6478 
6479         // Populate input allocations
6480         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dM));
6481         Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
6482         matrixAD.copyFrom(mBLASData.L3_dTRMM_A_mm);
6483         matrixBD.copyFrom(mBLASData.L3_dTRMM_B_mn);
6484 
6485         // Default case: LEFT, UPPER, NO_TRANSPOSE
6486         mBLAS.DTRMM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD);
6487         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
6488         matrixBRef.copyFrom(mBLASData.L3_dTRMM_o_LUN);
6489         verifyMatrix(matrixBRef, matrixBD);
6490 
6491         // Case: RIGHT, LOWER, TRANSPOSE
6492         matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
6493         matrixAD.copyFrom(mBLASData.L3_dTRMM_A_nn);
6494         // Reload matrix B, since it was overwritten by BLAS.
6495         matrixBD.copyFrom(mBLASData.L3_dTRMM_B_mn);
6496 
6497         side = ScriptIntrinsicBLAS.RIGHT;
6498         trans = ScriptIntrinsicBLAS.TRANSPOSE;
6499         uplo = ScriptIntrinsicBLAS.LOWER;
6500         mBLAS.DTRMM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD);
6501         matrixBRef.copyFrom(mBLASData.L3_dTRMM_o_RLT);
6502         verifyMatrix(matrixBRef, matrixBD);
6503 
6504         mRS.finish();
6505         checkError();
6506     }
6507 
test_L3_CTRMM_Correctness()6508     public void test_L3_CTRMM_Correctness() {
6509         int side = ScriptIntrinsicBLAS.LEFT;
6510         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
6511         int uplo = ScriptIntrinsicBLAS.UPPER;
6512         int diag = ScriptIntrinsicBLAS.NON_UNIT;
6513 
6514         // Populate input allocations
6515         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM));
6516         Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
6517         matrixAC.copyFrom(mBLASData.L3_cTRMM_A_mm);
6518         matrixBC.copyFrom(mBLASData.L3_cTRMM_B_mn);
6519 
6520         // Default case: LEFT, UPPER, NO_TRANSPOSE
6521         mBLAS.CTRMM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC);
6522         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
6523         matrixBRef.copyFrom(mBLASData.L3_cTRMM_o_LUN);
6524         verifyMatrix(matrixBRef, matrixBC);
6525 
6526         // Case: RIGHT, LOWER, TRANSPOSE
6527         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
6528         matrixAC.copyFrom(mBLASData.L3_cTRMM_A_nn);
6529         // Reload matrix B, since it was overwritten by BLAS.
6530         matrixBC.copyFrom(mBLASData.L3_cTRMM_B_mn);
6531 
6532         side = ScriptIntrinsicBLAS.RIGHT;
6533         trans = ScriptIntrinsicBLAS.TRANSPOSE;
6534         uplo = ScriptIntrinsicBLAS.LOWER;
6535         mBLAS.CTRMM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC);
6536         matrixBRef.copyFrom(mBLASData.L3_cTRMM_o_RLT);
6537         verifyMatrix(matrixBRef, matrixBC);
6538 
6539         mRS.finish();
6540         checkError();
6541     }
6542 
test_L3_ZTRMM_Correctness()6543     public void test_L3_ZTRMM_Correctness() {
6544         int side = ScriptIntrinsicBLAS.LEFT;
6545         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
6546         int uplo = ScriptIntrinsicBLAS.UPPER;
6547         int diag = ScriptIntrinsicBLAS.NON_UNIT;
6548 
6549         // Populate input allocations
6550         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM));
6551         Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
6552         matrixAZ.copyFrom(mBLASData.L3_zTRMM_A_mm);
6553         matrixBZ.copyFrom(mBLASData.L3_zTRMM_B_mn);
6554 
6555         // Default case: LEFT, UPPER, NO_TRANSPOSE
6556         mBLAS.ZTRMM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ);
6557         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
6558         matrixBRef.copyFrom(mBLASData.L3_zTRMM_o_LUN);
6559         verifyMatrix(matrixBRef, matrixBZ);
6560 
6561         // Case: RIGHT, LOWER, TRANSPOSE
6562         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
6563         matrixAZ.copyFrom(mBLASData.L3_zTRMM_A_nn);
6564         // Reload matrix B, since it was overwritten by BLAS.
6565         matrixBZ.copyFrom(mBLASData.L3_zTRMM_B_mn);
6566 
6567         side = ScriptIntrinsicBLAS.RIGHT;
6568         trans = ScriptIntrinsicBLAS.TRANSPOSE;
6569         uplo = ScriptIntrinsicBLAS.LOWER;
6570         mBLAS.ZTRMM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ);
6571         matrixBRef.copyFrom(mBLASData.L3_zTRMM_o_RLT);
6572         verifyMatrix(matrixBRef, matrixBZ);
6573 
6574         mRS.finish();
6575         checkError();
6576     }
6577 
6578 
validateTRSM(Element e, int Side, int Uplo, int TransA, int Diag, Allocation A, Allocation B)6579     private boolean validateTRSM(Element e, int Side, int Uplo, int TransA, int Diag, Allocation A, Allocation B) {
6580         int adim = -1, bM = -1, bN = -1;
6581         if (!validateSide(Side)) {
6582             return false;
6583         }
6584         if (!validateTranspose(TransA)) {
6585             return false;
6586         }
6587         if (!validateUplo(Uplo)) {
6588             return false;
6589         }
6590         if (!validateDiag(Diag)) {
6591             return false;
6592         }
6593         if (!A.getType().getElement().isCompatible(e) ||
6594             !B.getType().getElement().isCompatible(e)) {
6595             return false;
6596         }
6597         adim = A.getType().getX();
6598         if (adim != A.getType().getY()) {
6599             // this may be unnecessary, the restriction could potentially be relaxed
6600             // A needs to contain at least that symmetric matrix but could theoretically be larger
6601             // for now we assume adapters are sufficient, will reevaluate in the future
6602             return false;
6603         }
6604         bM = B.getType().getY();
6605         bN = B.getType().getX();
6606         if (Side == ScriptIntrinsicBLAS.LEFT) {
6607             // A is M*M
6608             if (adim != bM) {
6609                 return false;
6610             }
6611         } else {
6612             // A is N*N
6613             if (adim != bN) {
6614                 return false;
6615             }
6616         }
6617         return true;
6618     }
6619 
xTRSM_API_test(int Side, int Uplo, int TransA, int Diag, ArrayList<Allocation> mMatrix)6620     private void xTRSM_API_test(int Side, int Uplo, int TransA, int Diag, ArrayList<Allocation> mMatrix) {
6621         for (Allocation matA : mMatrix) {
6622             for (Allocation matB : mMatrix) {
6623                 Element elemA = matA.getType().getElement();
6624                 if (validateTRSM(elemA, Side, Uplo, TransA, Diag, matA, matB)) {
6625                     try {
6626                         if (elemA.isCompatible(Element.F32(mRS))) {
6627                             mBLAS.STRSM(Side, Uplo, TransA, Diag, alphaS, matA, matB);
6628                         } else if (elemA.isCompatible(Element.F64(mRS))) {
6629                             mBLAS.DTRSM(Side, Uplo, TransA, Diag, alphaD, matA, matB);
6630                         } else if (elemA.isCompatible(Element.F32_2(mRS))) {
6631                             mBLAS.CTRSM(Side, Uplo, TransA, Diag, alphaC, matA, matB);
6632                         } else if (elemA.isCompatible(Element.F64_2(mRS))) {
6633                             mBLAS.ZTRSM(Side, Uplo, TransA, Diag, alphaZ, matA, matB);
6634                         }
6635                     } catch (RSRuntimeException e) {
6636                         fail("should NOT throw RSRuntimeException");
6637                     }
6638                 } else {
6639                     try {
6640                         mBLAS.STRSM(Side, Uplo, TransA, Diag, alphaS, matA, matB);
6641                         fail("should throw RSRuntimeException for STRSM");
6642                     } catch (RSRuntimeException e) {
6643                     }
6644                     try {
6645                         mBLAS.DTRSM(Side, Uplo, TransA, Diag, alphaD, matA, matB);
6646                         fail("should throw RSRuntimeException for DTRSM");
6647                     } catch (RSRuntimeException e) {
6648                     }
6649                     try {
6650                         mBLAS.CTRSM(Side, Uplo, TransA, Diag, alphaC, matA, matB);
6651                         fail("should throw RSRuntimeException for CTRSM");
6652                     } catch (RSRuntimeException e) {
6653                     }
6654                     try {
6655                         mBLAS.ZTRSM(Side, Uplo, TransA, Diag, alphaZ, matA, matB);
6656                         fail("should throw RSRuntimeException for ZTRSM");
6657                     } catch (RSRuntimeException e) {
6658                     }
6659                 }
6660             }
6661         }
6662     }
6663 
L3_xTRSM_API(ArrayList<Allocation> mMatrix)6664     public void L3_xTRSM_API(ArrayList<Allocation> mMatrix) {
6665         for (int Side : mSide) {
6666             for (int Uplo : mUplo) {
6667                 for (int TransA : mTranspose) {
6668                     for (int Diag : mDiag) {
6669                         xTRSM_API_test(Side, Uplo, TransA, Diag, mMatrix);
6670                     }
6671                 }
6672             }
6673         }
6674     }
6675 
test_L3_STRSM_API()6676     public void test_L3_STRSM_API() {
6677         L3_xTRSM_API(mMatrixS);
6678     }
6679 
test_L3_DTRSM_API()6680     public void test_L3_DTRSM_API() {
6681         L3_xTRSM_API(mMatrixD);
6682     }
6683 
test_L3_CTRSM_API()6684     public void test_L3_CTRSM_API() {
6685         L3_xTRSM_API(mMatrixC);
6686     }
6687 
test_L3_ZTRSM_API()6688     public void test_L3_ZTRSM_API() {
6689         L3_xTRSM_API(mMatrixZ);
6690     }
6691 
test_L3_STRSM_Correctness()6692     public void test_L3_STRSM_Correctness() {
6693         int side = ScriptIntrinsicBLAS.LEFT;
6694         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
6695         int uplo = ScriptIntrinsicBLAS.UPPER;
6696         int diag = ScriptIntrinsicBLAS.NON_UNIT;
6697 
6698         // Populate input allocations
6699         Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dM));
6700         Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
6701         matrixAS.copyFrom(mBLASData.L3_sTRSM_A_mm);
6702         matrixBS.copyFrom(mBLASData.L3_sTRSM_B_mn);
6703 
6704         // Default case: LEFT, UPPER, NO_TRANSPOSE
6705         mBLAS.STRSM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS);
6706         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM));
6707         matrixBRef.copyFrom(mBLASData.L3_sTRSM_o_LUN);
6708         verifyMatrix(matrixBRef, matrixBS);
6709 
6710         // Case: RIGHT, LOWER, TRANSPOSE
6711         matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN));
6712         matrixAS.copyFrom(mBLASData.L3_sTRSM_A_nn);
6713         // Reload matrix B, since it was overwritten by BLAS.
6714         matrixBS.copyFrom(mBLASData.L3_sTRSM_B_mn);
6715 
6716         side = ScriptIntrinsicBLAS.RIGHT;
6717         trans = ScriptIntrinsicBLAS.TRANSPOSE;
6718         uplo = ScriptIntrinsicBLAS.LOWER;
6719         mBLAS.STRSM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS);
6720         matrixBRef.copyFrom(mBLASData.L3_sTRSM_o_RLT);
6721         verifyMatrix(matrixBRef, matrixBS);
6722 
6723         mRS.finish();
6724         checkError();
6725     }
6726 
test_L3_DTRSM_Correctness()6727     public void test_L3_DTRSM_Correctness() {
6728         int side = ScriptIntrinsicBLAS.LEFT;
6729         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
6730         int uplo = ScriptIntrinsicBLAS.UPPER;
6731         int diag = ScriptIntrinsicBLAS.NON_UNIT;
6732 
6733         // Populate input allocations
6734         Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dM));
6735         Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
6736         matrixAD.copyFrom(mBLASData.L3_dTRSM_A_mm);
6737         matrixBD.copyFrom(mBLASData.L3_dTRSM_B_mn);
6738 
6739         // Default case: LEFT, UPPER, NO_TRANSPOSE
6740         mBLAS.DTRSM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD);
6741         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM));
6742         matrixBRef.copyFrom(mBLASData.L3_dTRSM_o_LUN);
6743         verifyMatrix(matrixBRef, matrixBD);
6744 
6745         // Case: RIGHT, LOWER, TRANSPOSE
6746         matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN));
6747         matrixAD.copyFrom(mBLASData.L3_dTRSM_A_nn);
6748         // Reload matrix B, since it was overwritten by BLAS.
6749         matrixBD.copyFrom(mBLASData.L3_dTRSM_B_mn);
6750 
6751         side = ScriptIntrinsicBLAS.RIGHT;
6752         trans = ScriptIntrinsicBLAS.TRANSPOSE;
6753         uplo = ScriptIntrinsicBLAS.LOWER;
6754         mBLAS.DTRSM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD);
6755         matrixBRef.copyFrom(mBLASData.L3_dTRSM_o_RLT);
6756         verifyMatrix(matrixBRef, matrixBD);
6757 
6758         mRS.finish();
6759         checkError();
6760     }
6761 
test_L3_CTRSM_Correctness()6762     public void test_L3_CTRSM_Correctness() {
6763         int side = ScriptIntrinsicBLAS.LEFT;
6764         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
6765         int uplo = ScriptIntrinsicBLAS.UPPER;
6766         int diag = ScriptIntrinsicBLAS.NON_UNIT;
6767 
6768         // Populate input allocations
6769         Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM));
6770         Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
6771         matrixAC.copyFrom(mBLASData.L3_cTRSM_A_mm);
6772         matrixBC.copyFrom(mBLASData.L3_cTRSM_B_mn);
6773 
6774         // Default case: LEFT, UPPER, NO_TRANSPOSE
6775         mBLAS.CTRSM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC);
6776         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM));
6777         matrixBRef.copyFrom(mBLASData.L3_cTRSM_o_LUN);
6778         verifyMatrix(matrixBRef, matrixBC);
6779 
6780         // Case: RIGHT, LOWER, TRANSPOSE
6781         matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN));
6782         matrixAC.copyFrom(mBLASData.L3_cTRSM_A_nn);
6783         // Reload matrix B, since it was overwritten by BLAS.
6784         matrixBC.copyFrom(mBLASData.L3_cTRSM_B_mn);
6785 
6786         side = ScriptIntrinsicBLAS.RIGHT;
6787         trans = ScriptIntrinsicBLAS.TRANSPOSE;
6788         uplo = ScriptIntrinsicBLAS.LOWER;
6789         mBLAS.CTRSM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC);
6790         matrixBRef.copyFrom(mBLASData.L3_cTRSM_o_RLT);
6791         verifyMatrix(matrixBRef, matrixBC);
6792 
6793         mRS.finish();
6794         checkError();
6795     }
6796 
test_L3_ZTRSM_Correctness()6797     public void test_L3_ZTRSM_Correctness() {
6798         int side = ScriptIntrinsicBLAS.LEFT;
6799         int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE;
6800         int uplo = ScriptIntrinsicBLAS.UPPER;
6801         int diag = ScriptIntrinsicBLAS.NON_UNIT;
6802 
6803         // Populate input allocations
6804         Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM));
6805         Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
6806         matrixAZ.copyFrom(mBLASData.L3_zTRSM_A_mm);
6807         matrixBZ.copyFrom(mBLASData.L3_zTRSM_B_mn);
6808 
6809         // Default case: LEFT, UPPER, NO_TRANSPOSE
6810         mBLAS.ZTRSM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ);
6811         Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM));
6812         matrixBRef.copyFrom(mBLASData.L3_zTRSM_o_LUN);
6813         verifyMatrix(matrixBRef, matrixBZ);
6814 
6815         // Case: RIGHT, LOWER, TRANSPOSE
6816         matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN));
6817         matrixAZ.copyFrom(mBLASData.L3_zTRSM_A_nn);
6818         // Reload matrix B, since it was overwritten by BLAS.
6819         matrixBZ.copyFrom(mBLASData.L3_zTRSM_B_mn);
6820 
6821         side = ScriptIntrinsicBLAS.RIGHT;
6822         trans = ScriptIntrinsicBLAS.TRANSPOSE;
6823         uplo = ScriptIntrinsicBLAS.LOWER;
6824         mBLAS.ZTRSM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ);
6825         matrixBRef.copyFrom(mBLASData.L3_zTRSM_o_RLT);
6826         verifyMatrix(matrixBRef, matrixBZ);
6827 
6828         mRS.finish();
6829         checkError();
6830     }
6831 }
6832